# Create Monthly Task Reports in Smartsheet


In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import os
import json
import logging
from datetime import datetime, timedelta
from dateutil import parser
from typing import Dict, List

import jinja2
import pandas as pd
import numpy as np
import prefect
from box import Box

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import smartsheet
import scrapbook as sb
import dotenv

from prefect import Flow, Parameter, task, unmapped
from prefect.executors import LocalDaskExecutor, LocalExecutor

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

<IPython.core.display.Javascript object>

In [3]:
unposted_sheet_id = 4818113414883204
posted_sheet_id = 3567675495475076

<IPython.core.display.Javascript object>

In [4]:
environment = "dev"

# change these or set as papermill parameters to report on year and month
year_for_report = 2022
month_for_report = 5

<IPython.core.display.Javascript object>

In [5]:
# check the environment vars for secrets

env_file = f".env-{environment}"
logger.info("Loading the .env file from %s", env_file)
dotenv.load_dotenv(dotenv.find_dotenv(env_file))

assert os.environ.get("SMARTSHEET_KEY"), f"SMARTSHEET_KEY not found in {env_file}"

<IPython.core.display.Javascript object>

In [6]:
if not os.path.exists("_cache"):
    os.mkdir("_cache")

<IPython.core.display.Javascript object>

In [7]:
# connect smartsheet client
ss_client = smartsheet.Smartsheet(os.environ.get("SMARTSHEET_KEY"))
ss_client.errors_as_exceptions(True)

<IPython.core.display.Javascript object>

In [8]:
def month_end_date(year, month):
    """Calculate the month end date given a year and month."""
    month += 1
    if month == 13:
        month = 1
        year += 1

    tempdate = datetime.strptime(f"{year}-{month}-1", "%Y-%m-%d")
    return (tempdate - timedelta(days=1)).strftime("%Y-%m-%d")

<IPython.core.display.Javascript object>

In [9]:
prepared_date = datetime.today().strftime("%Y-%m-%d")
prepared_date

'2022-06-28'

<IPython.core.display.Javascript object>

In [10]:
month_ending_date = month_end_date(year_for_report, month_for_report)
month_ending_date

'2022-05-31'

<IPython.core.display.Javascript object>

In [11]:
posted_time_sheet = ss_client.Sheets.get_sheet(posted_sheet_id)

<IPython.core.display.Javascript object>

In [12]:
# break down the cell IDs into a quick lookup box
posted_cell_ids = ["Row ID"]
for column in posted_time_sheet.columns:
    my_column = column.to_dict()
    posted_cell_ids.append(my_column["title"])
posted_cell_ids

['Row ID',
 'Month-end Date',
 'Project Title',
 'Account/Client',
 'Resource',
 'Completed Hours',
 'Notes',
 'Grant Proposal #',
 'Estimated Only',
 'MDC Account ID',
 'MDC Project ID']

<IPython.core.display.Javascript object>

In [13]:
# break down the cells into a list of lists for a later dataframe
rows_list = []
for row in posted_time_sheet.rows:
    row_list = [row.id]
    for cell in row.cells:
        if cell.display_value:
            row_list.append(cell.display_value)
        else:
            # just in case there's a None in here, use NaN instead
            if cell.value:
                row_list.append(cell.value)
            else:
                row_list.append(np.NaN)

    rows_list.append(row_list)

<IPython.core.display.Javascript object>

In [14]:
# put it together
df = pd.DataFrame(rows_list, columns=posted_cell_ids)
df["Estimated Only"].fillna(False, inplace=True)
df

Unnamed: 0,Row ID,Month-end Date,Project Title,Account/Client,Resource,Completed Hours,Notes,Grant Proposal #,Estimated Only,MDC Account ID,MDC Project ID
0,3498357644519300,2021-07-01,240 Hour Intro Package Hours,HealthAI: Greene Lab,,-240.0,,,False,1882681138,
1,683607877412740,2021-07-01,240 Hour Intro Package Hours,HealthAI: Sean Davis,,-240.0,,,False,1907269862,
2,5750157458204548,2021-07-01,240 Hour Intro Package Hours,HealthAI: TISLab,,-240.0,,,False,1882681714,
3,8001957271889796,2021-07-01,80 Hour Intro Package Hours,HealthAI: Dwork Lab,,-80.0,,,False,2246385174,
4,1246557830834052,2021-07-01,80 Hour Intro Package Hours,HealthAI: Way Lab,,-80.0,,,False,1883649981,
5,790638730864516,2021-09-30,TISLab: Monarch UI (3.0) Redesign,HealthAI: TISLab,Vincent Rubinetti,56.0,Vince's 20% from Sept-end of March covered thi...,213359.0,False,1882681714,1882442059.0
6,6414467834505092,2021-10-31,TISLab: Monarch UI (3.0) Redesign,HealthAI: TISLab,Vincent Rubinetti,152.0,To Discuss with MH - 3/30/22 HH,213359.0,False,1882681714,1882442059.0
7,1956310034868100,2021-11-30,Greenelab: lab-website-template and related si...,HealthAI: Greene Lab,Vincent Rubinetti,18.0,,,False,1882681138,1882712838.0
8,7585809569081220,2021-11-30,Greenelab: Preprint Single Page App,HealthAI: Greene Lab,Vincent Rubinetti,5.0,,,False,1882681138,1977980999.0
9,6459909662238596,2021-11-30,TISLab: Monarch UI (3.0) Redesign,HealthAI: TISLab,Vincent Rubinetti,4.30694,To Discuss with MH - 3/30/22 HH,213359.0,False,1882681714,1882442059.0


<IPython.core.display.Javascript object>

In [15]:
# only need this months info and no estimates
monthly_df = df.loc[
    ((df["Month-end Date"] == month_ending_date) & (df["Estimated Only"] == False))
].copy()
monthly_df.drop(columns=["Month-end Date", "Estimated Only"], inplace=True)
monthly_df["Completed Hours"] = monthly_df["Completed Hours"].astype(float)
monthly_df

Unnamed: 0,Row ID,Project Title,Account/Client,Resource,Completed Hours,Notes,Grant Proposal #,MDC Account ID,MDC Project ID
41,4278815547385732,"Greenelab: Biomedical Literature ""Word Lapse"" ...",HealthAI: Greene Lab,Faisal Alquaddoomi,11.0,,213269.0,1882681138,1969468997
42,8782415174756228,"Greenelab: Biomedical Literature ""Word Lapse"" ...",HealthAI: Greene Lab,Vincent Rubinetti,8.0,,213269.0,1882681138,1969468997
43,4701028012451716,Greenelab: mygeneset.info,HealthAI: Greene Lab,Vincent Rubinetti,16.0,,,1882681138,1882738595
44,197428385081220,Greenelab: Staffing/Support FY2022,HealthAI: Greene Lab,Vincent Rubinetti,12.0,,,1882681138,2303324267
45,2449228198766468,TISLab: Monarch UI (3.0) Redesign,HealthAI: TISLab,Vincent Rubinetti,0.0,,213359.0,1882681714,1882442059
46,6952827826136964,Way Lab: Staffing/Support FY2022,HealthAI: Way Lab,Dave Bunten,32.0,,,1883649981,2334955423
47,1323328291923844,Way Lab: Staffing/Support FY2022,HealthAI: Way Lab,Vincent Rubinetti,8.0,,,1883649981,2334955423


<IPython.core.display.Javascript object>

In [16]:
# something in the way this is copied makes agg() not work when summing the completed hours. Weird.
projects_df = (
    monthly_df[
        [
            "MDC Account ID",
            "Account/Client",
            "MDC Project ID",
            "Project Title",
            "Notes",
            "Grant Proposal #",
            "Completed Hours",
        ]
    ]
    .groupby(["MDC Account ID", "MDC Project ID"])
    .agg(
        {
            "Account/Client": "first",
            "Project Title": "first",
            "Notes": "first",
            "Grant Proposal #": "first",
            "Completed Hours": "sum",
        },
    )
    .reset_index()
)
projects_df

Unnamed: 0,MDC Account ID,MDC Project ID,Account/Client,Project Title,Notes,Grant Proposal #,Completed Hours
0,1882681138,1882738595,HealthAI: Greene Lab,Greenelab: mygeneset.info,,,16.0
1,1882681138,1969468997,HealthAI: Greene Lab,"Greenelab: Biomedical Literature ""Word Lapse"" ...",,213269.0,19.0
2,1882681138,2303324267,HealthAI: Greene Lab,Greenelab: Staffing/Support FY2022,,,12.0
3,1882681714,1882442059,HealthAI: TISLab,TISLab: Monarch UI (3.0) Redesign,,213359.0,0.0
4,1883649981,2334955423,HealthAI: Way Lab,Way Lab: Staffing/Support FY2022,,,40.0


<IPython.core.display.Javascript object>

In [17]:
# shuck the projects by account
accounts_df = (
    projects_df[["MDC Account ID", "Account/Client", "Completed Hours"]]
    .groupby(["MDC Account ID"])
    .agg({"Account/Client": "first", "Completed Hours": "sum"})
    .reset_index()
)
accounts_df["Month-ending Date"] = month_ending_date
accounts_df["Report Prepared Date"] = prepared_date
accounts_df

Unnamed: 0,MDC Account ID,Account/Client,Completed Hours,Month-ending Date,Report Prepared Date
0,1882681138,HealthAI: Greene Lab,47.0,2022-05-31,2022-06-28
1,1882681714,HealthAI: TISLab,0.0,2022-05-31,2022-06-28
2,1883649981,HealthAI: Way Lab,40.0,2022-05-31,2022-06-28


<IPython.core.display.Javascript object>

In [18]:
# done with most of the columns, so dropping to keep things straight
monthly_df.drop(
    columns=[
        "Account/Client",
        "Project Title",
        "Notes",
        "Grant Proposal #",
    ],
    inplace=True,
)
monthly_df

Unnamed: 0,Row ID,Resource,Completed Hours,MDC Account ID,MDC Project ID
41,4278815547385732,Faisal Alquaddoomi,11.0,1882681138,1969468997
42,8782415174756228,Vincent Rubinetti,8.0,1882681138,1969468997
43,4701028012451716,Vincent Rubinetti,16.0,1882681138,1882738595
44,197428385081220,Vincent Rubinetti,12.0,1882681138,2303324267
45,2449228198766468,Vincent Rubinetti,0.0,1882681714,1882442059
46,6952827826136964,Dave Bunten,32.0,1883649981,2334955423
47,1323328291923844,Vincent Rubinetti,8.0,1883649981,2334955423


<IPython.core.display.Javascript object>

In [19]:
accounts_dict = accounts_df.to_dict("records")
accounts_dict

[{'MDC Account ID': '1882681138',
  'Account/Client': 'HealthAI: Greene Lab',
  'Completed Hours': 47.0,
  'Month-ending Date': '2022-05-31',
  'Report Prepared Date': '2022-06-28'},
 {'MDC Account ID': '1882681714',
  'Account/Client': 'HealthAI: TISLab',
  'Completed Hours': 0.0,
  'Month-ending Date': '2022-05-31',
  'Report Prepared Date': '2022-06-28'},
 {'MDC Account ID': '1883649981',
  'Account/Client': 'HealthAI: Way Lab',
  'Completed Hours': 40.0,
  'Month-ending Date': '2022-05-31',
  'Report Prepared Date': '2022-06-28'}]

<IPython.core.display.Javascript object>

In [20]:
# start breaking down the projects
for account in accounts_dict:
    each_df = projects_df.loc[
        projects_df["MDC Account ID"] == account["MDC Account ID"]
    ]
    account["Projects"] = each_df.to_dict("records")

<IPython.core.display.Javascript object>

In [21]:
for account in accounts_dict:
    for project in account["Projects"]:
        # get the summaries for a single project
        players_df = monthly_df.loc[
            (
                (monthly_df["MDC Account ID"] == project["MDC Account ID"])
                & (monthly_df["MDC Project ID"] == project["MDC Project ID"])
            )
        ]
        project["Resources"] = players_df.to_dict("records")

<IPython.core.display.Javascript object>

In [22]:
# finally get the task attachment for each resource

for account in accounts_dict:
    print(account["Account/Client"])
    for project in account["Projects"]:
        print(project["Project Title"])
        for resource in project["Resources"]:
            row_id = resource["Row ID"]
            print(resource["Resource"])
            # get the attachements
            response = ss_client.Attachments.list_row_attachments(
                posted_sheet_id, row_id, include_all=True
            )

            if response.data and len(response.data) > 0:
                for file in response.data:
                    file_name = f"{row_id}.csv"
                    if file.name == file_name:
                        # if the file name matches what we know the task list is,
                        attachment = ss_client.Attachments.get_attachment(
                            posted_sheet_id, file.id
                        )
                        print(f"Downloading {file_name}...")
                        # get the attachment and read it into a dataframe
                        tasks_df = pd.read_csv(attachment.url)
                        tasks_df["Notes"] = tasks_df["Notes"].fillna("")
                        tasks_df["Pull Request URL"] = tasks_df[
                            "Pull Request URL"
                        ].fillna("")
                        tasks_df["Issue URL"] = tasks_df["Issue URL"].fillna("")
                        resource["Tasks"] = tasks_df.to_dict("records")

HealthAI: Greene Lab
Greenelab: mygeneset.info
Vincent Rubinetti
Downloading 4701028012451716.csv...
Greenelab: Biomedical Literature "Word Lapse" Single Page App
Faisal Alquaddoomi
Downloading 4278815547385732.csv...
Vincent Rubinetti
Downloading 8782415174756228.csv...
Greenelab: Staffing/Support FY2022
Vincent Rubinetti
Downloading 197428385081220.csv...
HealthAI: TISLab
TISLab: Monarch UI (3.0) Redesign
Vincent Rubinetti
Downloading 2449228198766468.csv...
HealthAI: Way Lab
Way Lab: Staffing/Support FY2022
Dave Bunten
Downloading 6952827826136964.csv...
Vincent Rubinetti
Downloading 1323328291923844.csv...


<IPython.core.display.Javascript object>

In [23]:
accounts_dict

[{'MDC Account ID': '1882681138',
  'Account/Client': 'HealthAI: Greene Lab',
  'Completed Hours': 47.0,
  'Month-ending Date': '2022-05-31',
  'Report Prepared Date': '2022-06-28',
  'Projects': [{'MDC Account ID': '1882681138',
    'MDC Project ID': '1882738595',
    'Account/Client': 'HealthAI: Greene Lab',
    'Project Title': 'Greenelab: mygeneset.info',
    'Notes': None,
    'Grant Proposal #': None,
    'Completed Hours': 16.0,
    'Resources': [{'Row ID': 4701028012451716,
      'Resource': 'Vincent Rubinetti',
      'Completed Hours': 16.0,
      'MDC Account ID': '1882681138',
      'MDC Project ID': '1882738595',
      'Tasks': [{'task_id': 2595067032,
        'Task Name': 'Incorporate login/signup/oauth endpoints into frontend',
        'Resource': 'Vincent Rubinetti',
        'Notes': 'The associated PR is closed, however it was not successful. There were problems once deployed. So this task is not completed.',
        'Pull Request URL': 'https://github.com/biothings/myg

<IPython.core.display.Javascript object>

In [24]:
templateLoader = jinja2.FileSystemLoader(searchpath="./")
templateEnv = jinja2.Environment(loader=templateLoader)
template_file = "monthly_report_template.md"
template = templateEnv.get_template(template_file)

for account in accounts_dict:
    output_text = template.render(account=account)

    output_file_name = (
        f"{account['MDC Account ID']}-monthly_report-{month_ending_date}.md"
    )
    with open(os.path.join("_cache", output_file_name), "w") as fh:
        fh.write(output_text)

<IPython.core.display.Javascript object>