# Create Monthly Task Reports in Smartsheet


In [None]:
#%load_ext nb_black

In [None]:
import os
import json
import logging
import jinja2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import smartsheet
import scrapbook as sb
import dotenv

from datetime import datetime, timedelta
from dateutil import parser
from typing import Dict, List
from pprint import pprint
from pathlib import Path
from slugify import slugify
from jsonschema import validate, ValidationError, RefResolver
from jsonschema.exceptions import RefResolutionError
from docxtpl import (
    DocxTemplate,
    InlineImage,
)  # For this you'll need to `pip install docxtpl`
# This comes in with the templating library
from docx.shared import Inches
from prefect import task, flow

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

In [None]:
unposted_sheet_id = 4818113414883204
posted_sheet_id = 3567675495475076
client_reports_id = 8413910635177860

In [None]:
environment = "dev"

# change these or set as papermill parameters to report on year and month
year_for_report = 2022
month_for_report = 7

# it is possible to run this without posting to smartsheet... useful for testing
post_records = True

In [None]:
# check the environment vars for secrets

env_file = f".env-{environment}"
logger.info("Loading the .env file from %s", env_file)
dotenv.load_dotenv(dotenv.find_dotenv(env_file))

assert os.environ.get("SMARTSHEET_KEY"), f"SMARTSHEET_KEY not found in {env_file}"

In [None]:
if not os.path.exists("_cache"):
    os.mkdir("_cache")

In [None]:
# connect smartsheet client
ss_client = smartsheet.Smartsheet(os.environ.get("SMARTSHEET_KEY"))
ss_client.errors_as_exceptions(True)

In [None]:
def validate_json(schema, json_data):

    # mostly from https://stackoverflow.com/questions/25145160/json-schema-ref-does-not-work-for-relative-path
    schemas = (json.load(open(source)) for source in Path("schema").iterdir())
    schema_store = {schema["$id"]: schema for schema in schemas}

    resolver = RefResolver.from_schema(schema, store=schema_store)

    try:
        result = validate(instance=json_data, schema=schema, resolver=resolver)
        return True, None
    except ValidationError as err:
        return False, err.message

In [None]:
def month_end_date(year, month):
    """Calculate the month end date given a year and month."""
    month += 1
    if month == 13:
        month = 1
        year += 1

    tempdate = datetime.strptime(f"{year}-{month}-1", "%Y-%m-%d")
    return (tempdate - timedelta(days=1)).strftime("%Y-%m-%d")

In [None]:
prepared_date = datetime.today().strftime("%Y-%m-%d")
prepared_date

In [None]:
month_ending_date = month_end_date(year_for_report, month_for_report)
month_ending_date

In [None]:
posted_time_sheet = ss_client.Sheets.get_sheet(posted_sheet_id)

In [None]:
# break down the cell IDs into a quick lookup box
posted_cell_ids = ["Row ID"]
for column in posted_time_sheet.columns:
    my_column = column.to_dict()
    posted_cell_ids.append(my_column["title"])

posted_cell_ids

In [None]:
# break down the cells into a list of lists for a later dataframe
rows_list = []
for row in posted_time_sheet.rows:
    row_list = [row.id]
    for cell in row.cells:
        if cell.display_value:
            row_list.append(cell.display_value)
        else:
            # just in case there's a None in here, use NaN instead
            if cell.value:
                row_list.append(cell.value)
            else:
                row_list.append(np.NaN)

    rows_list.append(row_list)

In [None]:
# put it together as a dataframe
df = pd.DataFrame(rows_list, columns=posted_cell_ids)
df["Estimated Only"].fillna(False, inplace=True)
df

In [None]:
# only need this months info and no estimates
monthly_df = (
    df.loc[
        ((df["Month-end Date"] == month_ending_date) & (df["Estimated Only"] == False))
    ]
    .copy()
    .fillna("")
)
monthly_df["Completed Hours"] = monthly_df["Completed Hours"].astype(float)
monthly_df.drop(columns=["Month-end Date", "Estimated Only"], inplace=True)
monthly_df

## With the posted hours dataframe, start breaking it down into a dict

In [None]:
accounts_df = (
    monthly_df.groupby(["MDC Account ID"])
    .agg({"Account/Client": "first", "Completed Hours": "sum"})
    .reset_index()
)
accounts_df

In [None]:
projects_df = (
    monthly_df.groupby(["MDC Account ID", "MDC Project ID"])
    .agg(
        {
            "Project Title": "first",
            "Notes": "first",
            "Grant Proposal #": "first",
            "Completed Hours": "sum",
        },
    )
    .reset_index()
)
projects_df

In [None]:
resources_df = (
    monthly_df.groupby(["MDC Account ID", "MDC Project ID", "MDC Resource ID"])
    .agg(
        {"Resource": "first", "Completed Hours": "sum", "Row ID": "first"},
    )
    .reset_index()
)

resources_df

In [None]:
def get_task_file(project_id, resource_id, row_id):
    """
    Fetch the attachment in the form project_id-resource_id-month_ending_date.json
    from the given row_id.

    Returns a sorted by date and validated JSON/dict and completed hours per resource
    """

    with open(os.path.join("schema", "effort_hours-resource.json"), "r") as file:
        schema = json.load(file)

    # get the attachements
    response = ss_client.Attachments.list_row_attachments(
        posted_sheet_id, row_id, include_all=True
    )

    if response.data and len(response.data) > 0:
        for file in response.data:
            filename = f"{project_id}_{resource_id}_{month_ending_date}.json"
            if file.name == filename:
                # if the file name matches what we know the task list is,
                attachment = ss_client.Attachments.get_attachment(
                    posted_sheet_id, file.id
                )
                logger.info(f"Downloading {filename}...")
                ss_client.Attachments.download_attachment(attachment, "_cache")

                with open(os.path.join("_cache", filename), "r") as f:
                    json_result = json.load(f)

                # need to sum hours, and sort
                df = pd.DataFrame(json_result["Tasks"])
                completed_hours = df["Completed Hours"].sum()
                json_result["Tasks"] = df.sort_values(
                    by="Task Complete Date", ascending=True
                ).to_dict("records")

                # the files _must_ match the schema
                valid, msg = validate_json(schema, json_result)
                if not valid:
                    logger.error("validation error %s", msg)

                return json_result, completed_hours


# _ = get_task_file("1969468997", "25815853", "2292815046043524")

In [None]:
def get_client(client_id):

    client_dict = {}

    row = accounts_df.loc[accounts_df["MDC Account ID"] == client_id].to_dict("records")

    # should only be one
    if len(row) == 1:

        client_dict = {
            "MDC Client ID": row[0]["MDC Account ID"],
            "Client Name": row[0]["Account/Client"],
            "Notes": "",
            "No Bill": False,
            "Completed Hours": row[0]["Completed Hours"],
        }

    return client_dict


# get_client("1882681138")

In [None]:
def get_projects(client_id):

    with open(os.path.join("schema", "effort_hours-project.json"), "r") as file:
        schema = json.load(file)

    projects_dict = []

    rows = projects_df.loc[projects_df["MDC Account ID"] == client_id].to_dict(
        "records"
    )

    for row in rows:

        project_dict = {
            "MDC Project ID": row["MDC Project ID"],
            "Project Name": row["Project Title"],
            "Notes": row["Notes"],
            "Completed Hours": row["Completed Hours"],
            "Resources": [],
        }

        # get the resources by project
        resources_dict = resources_df.loc[
            (resources_df["MDC Project ID"] == row["MDC Project ID"])
        ].to_dict("records")

        # go through each resource
        for resource in resources_dict:

            resource_dict, completed_hours = get_task_file(
                row["MDC Project ID"], resource["MDC Resource ID"], resource["Row ID"]
            )

            resource_dict["Completed Hours"] = completed_hours
            project_dict["Resources"].append(resource_dict)

        valid, msg = validate_json(schema, project_dict)
        if not valid:
            logger.error("validation error %s", msg)

        projects_dict.append(project_dict)

    return projects_dict


# get_projects("1882681138")

## build a list of reports

In [None]:
def build_report(account_id):

    with open(os.path.join("schema", "effort_hours-report.json"), "r") as file:
        schema = json.load(file)

    logger.info(f"Creating report for account {account_id}.")

    # primary report dict
    report = {
        "Month-ending Date": month_ending_date,
        "Report Prepared Date": prepared_date,
        "Notes": "",
        "Client": {},
    }

    client_dict = get_client(account_id)

    projects = get_projects(client_dict["MDC Client ID"])
    # add the projects
    client_dict["Projects"] = projects

    # set the client in the report
    report["Client"] = client_dict

    valid, msg = validate_json(schema, report)
    if not valid:
        logger.error("The report is not valid, %s", msg)

    return report


# build_report("1882681138")

In [None]:
account_ids = accounts_df["MDC Account ID"].unique().tolist()
account_ids

In [None]:
# build them all
account_reports = {}
for account_id in account_ids:
    report = build_report(account_id)
    account_reports[account_id] = report

In [None]:
def slugged_filename(month_ending_date, client_name):
    slugged_client_name = slugify(client_name)
    filename = f"se_hours-{month_ending_date}-{slugged_client_name}.docx"
    return filename

In [None]:
# Create a docx template instance from the template Word file
template = DocxTemplate("client_report_template.docx")

account_keys = account_reports.keys()

for account_id in account_keys:

    # render the object in memory
    template.render({"v": account_reports[account_id]})

    # save the object to the file system
    filename = slugged_filename(month_ending_date, account_reports[account_id]['Client']['Client Name'])
    logger.info("Creating file {filename}")
    template.save(os.path.join("_cache", filename))

In [None]:
client_reports = ss_client.Sheets.get_sheet(client_reports_id)

In [None]:
# break down the cell IDs into a quick lookup box
cell_ids = {}
for column in client_reports.columns:
    my_column = column.to_dict()
    cell_ids[my_column["title"]] = my_column["id"]
cell_ids

In [None]:
def append_ss_row(account_reports, account_id):

    v = account_reports[account_id]

    row = ss_client.models.row.Row()

    row.cells.append(
        {"column_id": cell_ids["Client Name"], "value": v["Client"]["Client Name"]}
    )
    row.cells.append(
        {"column_id": cell_ids["Month-end Date"], "value": v["Month-ending Date"]}
    )
    row.cells.append(
        {
            "column_id": cell_ids["Completed Hours"],
            "value": v["Client"]["Completed Hours"],
        }
    )
    row.cells.append(
        {"column_id": cell_ids["MDC Client ID"], "value": v["Client"]["MDC Client ID"]}
    )
    if v["Client"]["Notes"]:
        row.cells.append(
            {"column_id": cell_ids["Notes"], "value": v["Client"]["Notes"]}
        )

    row.to_bottom = True

    result = ss_client.Sheets.add_rows(client_reports_id, row)

    return result.to_dict()["data"][0]["id"]

In [None]:
account_keys = account_reports.keys()

if post_records:

    for account_id in account_keys:

        row_id = append_ss_row(account_reports, account_id)
        print(row_id)

        filename = slugged_filename(month_ending_date, account_reports[account_id]['Client']['Client Name'])

        with open(os.path.join("_cache", filename), "rb") as f:
            ss_client.Attachments.attach_file_to_row(client_reports_id, row_id, f)
            
else:
    
    logger.info("Not posting records.")