# Create Monthly Task Reports in Smartsheet


In [None]:
%load_ext nb_black

In [None]:
import os
import json
import logging
from datetime import datetime, timedelta
from dateutil import parser
from typing import Dict, List

import jinja2
import pandas as pd
import numpy as np
import prefect
from box import Box

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import smartsheet
import scrapbook as sb
import dotenv

from prefect import Flow, Parameter, task, unmapped
from prefect.executors import LocalDaskExecutor, LocalExecutor

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

In [None]:
unposted_sheet_id = 4818113414883204
posted_sheet_id = 3567675495475076

In [None]:
environment = "dev"

# change these or set as papermill parameters to report on year and month
year_for_report = 2022
month_for_report = 5

In [None]:
# check the environment vars for secrets

env_file = f".env-{environment}"
logger.info("Loading the .env file from %s", env_file)
dotenv.load_dotenv(dotenv.find_dotenv(env_file))

assert os.environ.get("SMARTSHEET_KEY"), f"SMARTSHEET_KEY not found in {env_file}"

In [None]:
if not os.path.exists("_cache"):
    os.mkdir("_cache")

In [None]:
# connect smartsheet client
ss_client = smartsheet.Smartsheet(os.environ.get("SMARTSHEET_KEY"))
ss_client.errors_as_exceptions(True)

In [None]:
def month_end_date(year, month):
    """Calculate the month end date given a year and month."""
    month += 1
    if month == 13:
        month = 1
        year += 1

    tempdate = datetime.strptime(f"{year}-{month}-1", "%Y-%m-%d")
    return (tempdate - timedelta(days=1)).strftime("%Y-%m-%d")

In [None]:
prepared_date = datetime.today().strftime("%Y-%m-%d")
prepared_date

In [None]:
month_ending_date = month_end_date(year_for_report, month_for_report)
month_ending_date

In [None]:
posted_time_sheet = ss_client.Sheets.get_sheet(posted_sheet_id)

In [None]:
# break down the cell IDs into a quick lookup box
posted_cell_ids = ["Row ID"]
for column in posted_time_sheet.columns:
    my_column = column.to_dict()
    posted_cell_ids.append(my_column["title"])
posted_cell_ids

In [None]:
# break down the cells into a list of lists for a later dataframe
rows_list = []
for row in posted_time_sheet.rows:
    row_list = [row.id]
    for cell in row.cells:
        if cell.display_value:
            row_list.append(cell.display_value)
        else:
            # just in case there's a None in here, use NaN instead
            if cell.value:
                row_list.append(cell.value)
            else:
                row_list.append(np.NaN)

    rows_list.append(row_list)

In [None]:
# put it together
df = pd.DataFrame(rows_list, columns=posted_cell_ids)
df["Estimated Only"].fillna(False, inplace=True)
df

In [None]:
# only need this months info and no estimates
monthly_df = df.loc[
    ((df["Month-end Date"] == month_ending_date) & (df["Estimated Only"] == False))
].copy()
monthly_df.drop(columns=["Month-end Date", "Estimated Only"], inplace=True)
monthly_df["Completed Hours"] = monthly_df["Completed Hours"].astype(float)
monthly_df

In [None]:
# something in the way this is copied makes agg() not work when summing the completed hours. Weird.
projects_df = (
    monthly_df[
        [
            "MDC Account ID",
            "Account/Client",
            "MDC Project ID",
            "Project Title",
            "Notes",
            "Grant Proposal #",
            "Completed Hours",
        ]
    ]
    .groupby(["MDC Account ID", "MDC Project ID"])
    .agg(
        {
            "Account/Client": "first",
            "Project Title": "first",
            "Notes": "first",
            "Grant Proposal #": "first",
            "Completed Hours": "sum",
        },
    )
    .reset_index()
)
projects_df

In [None]:
# shuck the projects by account
accounts_df = (
    projects_df[["MDC Account ID", "Account/Client", "Completed Hours"]]
    .groupby(["MDC Account ID"])
    .agg({"Account/Client": "first", "Completed Hours": "sum"})
    .reset_index()
)
accounts_df["Month-ending Date"] = month_ending_date
accounts_df["Report Prepared Date"] = prepared_date
accounts_df

In [None]:
# done with most of the columns, so dropping to keep things straight
monthly_df.drop(
    columns=[
        "Account/Client",
        "Project Title",
        "Notes",
        "Grant Proposal #",
    ],
    inplace=True,
)
monthly_df

In [None]:
accounts_dict = accounts_df.to_dict("records")
accounts_dict

In [None]:
# start breaking down the projects
for account in accounts_dict:
    each_df = projects_df.loc[
        projects_df["MDC Account ID"] == account["MDC Account ID"]
    ]
    account["Projects"] = each_df.to_dict("records")

In [None]:
for account in accounts_dict:
    for project in account["Projects"]:
        # get the summaries for a single project
        players_df = monthly_df.loc[
            (
                (monthly_df["MDC Account ID"] == project["MDC Account ID"])
                & (monthly_df["MDC Project ID"] == project["MDC Project ID"])
            )
        ]
        project["Resources"] = players_df.to_dict("records")

In [None]:
# finally get the task attachment for each resource

for account in accounts_dict:
    print(account["Account/Client"])
    for project in account["Projects"]:
        print(project["Project Title"])
        for resource in project["Resources"]:
            row_id = resource["Row ID"]
            print(resource["Resource"])
            # get the attachements
            response = ss_client.Attachments.list_row_attachments(
                posted_sheet_id, row_id, include_all=True
            )

            if response.data and len(response.data) > 0:
                for file in response.data:
                    file_name = f"{row_id}.csv"
                    if file.name == file_name:
                        # if the file name matches what we know the task list is,
                        attachment = ss_client.Attachments.get_attachment(
                            posted_sheet_id, file.id
                        )
                        print(f"Downloading {file_name}...")
                        # get the attachment and read it into a dataframe
                        tasks_df = pd.read_csv(attachment.url)
                        tasks_df["Notes"] = tasks_df["Notes"].fillna("")
                        tasks_df["Pull Request URL"] = tasks_df[
                            "Pull Request URL"
                        ].fillna("")
                        tasks_df["Issue URL"] = tasks_df["Issue URL"].fillna("")
                        resource["Tasks"] = tasks_df.to_dict("records")

In [None]:
accounts_dict

In [None]:
templateLoader = jinja2.FileSystemLoader(searchpath="./")
templateEnv = jinja2.Environment(loader=templateLoader)
template_file = "monthly_report_template.md"
template = templateEnv.get_template(template_file)

for account in accounts_dict:
    output_text = template.render(account=account)

    output_file_name = (
        f"{account['MDC Account ID']}-monthly_report-{month_ending_date}.md"
    )
    with open(os.path.join("_cache", output_file_name), "w") as fh:
        fh.write(output_text)