# Create Monthly Task Exports in Smartsheet


In [None]:
#%load_ext nb_black

In [None]:
import os
import json
import logging
import copy
from datetime import datetime, timedelta
from dateutil import parser
from typing import Dict, List
from pprint import pprint
from pathlib import Path

import pandas as pd
import numpy as np

from box import Box

import smartsheet

# uses the pretty okay SDK here: https://github.com/ProdPerfect/monday
from monday import MondayClient

from mondaydotcom_utils.formatted_value import (
    FormattedValue,
    get_col_defs,
    get_items_by_board,
)

import scrapbook as sb
import dotenv

from prefect import task, flow

from jsonschema import validate, ValidationError, RefResolver
from jsonschema.exceptions import RefResolutionError

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

In [None]:
TASKS_BOARD_ID = "1883170887"
AGREEMENTS_BOARD_ID = "1882423671"
PROJECTS_BOARD_ID = "1882404316"
ACCOUNTS_BOARD_ID = "1882424009"

PROJECT_TASK_TIME_BOARD_ID = "2398200403"

posted_tasks_id = 3567675495475076

In [None]:
if not os.path.exists("_cache"):
    os.mkdir("_cache")

In [None]:
environment = "dev"

# change these or set as papermill parameters to report on year and month
year_for_report = 2022
month_for_report = 7

# it is possible to run this without posting to smartsheet... useful for testing
post_records = True

In [None]:
# check the environment vars for secrets

env_file = f".env-{environment}"
logger.info("Loading the .env file from %s", env_file)
dotenv.load_dotenv(dotenv.find_dotenv(env_file))

assert os.environ.get("MONDAY_KEY"), f"MONDAY_KEY not found in {env_file}"
assert os.environ.get("SMARTSHEET_KEY"), f"SMARTSHEET_KEY not found in {env_file}"

In [None]:
def month_end_date(year, month):
    """Calculate the month end date given a year and month."""
    month += 1
    if month == 13:
        month = 1
        year += 1

    tempdate = datetime.strptime(f"{year}-{month}-1", "%Y-%m-%d")
    return (tempdate - timedelta(days=1)).strftime("%Y-%m-%d")

In [None]:
month_ending_date = month_end_date(year_for_report, month_for_report)
month_ending_date

In [None]:
prepared_date = datetime.today().strftime("%Y-%m-%d")
prepared_date

In [None]:
# connect monday client
conn = MondayClient(os.environ.get("MONDAY_KEY"))

In [None]:
# connect smartsheet client
ss_client = smartsheet.Smartsheet(os.environ.get("SMARTSHEET_KEY"))
ss_client.errors_as_exceptions(True)

In [None]:
def validate_json(schema, json_data):

    # mostly from https://stackoverflow.com/questions/25145160/json-schema-ref-does-not-work-for-relative-path
    schemas = (json.load(open(source)) for source in Path("schema").iterdir())
    schema_store = {schema["$id"]: schema for schema in schemas}

    resolver = RefResolver.from_schema(schema, store=schema_store)

    try:
        result = validate(instance=json_data, schema=schema, resolver=resolver)
        return True, None
    except ValidationError as err:
        return False, err.message

## Start fetching MDC data

In [None]:
# bug between ProdPerfect and MDC's API: https://github.com/ProdPerfect/monday/issues/57
from monday.resources.base import BaseResource

query = """query
    {
        users () {
            id
            name
            email
            enabled
        }
    }"""
query

In [None]:
base_resource = BaseResource(os.environ.get("MONDAY_KEY"))
users = base_resource._query(query)["data"]["users"]

In [None]:
users_df = pd.DataFrame(users).set_index("id")
users_df

In [None]:
accounts_df = get_items_by_board(conn, ACCOUNTS_BOARD_ID).fillna("")

accounts_df.rename(
    columns={
        "monday_id": "MDC Client ID",
        "monday_name": "Client Name",
        "No Bill__checked": "No Bill",
        "Notes": "Client Notes",
    },
    inplace=True,
)

accounts_df.drop(
    columns=[
        "Contacts",
        "Subitems",
        "Customer Projects",
        "Type__text",
        "Type__changed_at",
        "No Bill__changed_at",
    ],
    inplace=True,
)

accounts_df

In [None]:
projects_df = get_items_by_board(conn, PROJECTS_BOARD_ID).fillna("")

projects_df.rename(
    columns={
        "monday_id": "MDC Project ID",
        "monday_name": "Project Name",
        "Project Lifecycle__text": "Project Lifecycle",
        "Account": "MDC Client ID",
        "Notes": "Project Notes",
    },
    inplace=True,
)

projects_df.drop(
    columns=[
        "Project Tasks",
        "Subitems",
        "Project Contacts",
        "Timeline",
        "Customer Source",
        "Dependency",
        "Date Added",
        "Timeline Days",
        "Date Added__default_formatter",
        "Tasks Status__mirror",
        "Project Lifecycle__changed_at",
        "Repo Description__mirror",
        "Timeline__to",
        "Timeline__from",
        "Timeline__changed_at",
    ],
    inplace=True,
)

projects_df = projects_df.explode(["MDC Client ID"], ignore_index=True)
projects_df

In [None]:
# add the account to the projects
projects_df = pd.merge(projects_df, accounts_df, how="left", on="MDC Client ID")
projects_df

In [None]:
# only getting done tasks
tasks_df = get_items_by_board(conn, TASKS_BOARD_ID, "status", "Done")

# Only include Ready tasks
logger.info("Selecting Ready tasks")
tasks_df = tasks_df.loc[
    tasks_df["Integration Message"].str.startswith("Ready", na=False)
]

tasks_df.rename(
    columns={
        "monday_id": "MDC Task ID",
        "monday_name": "Task Name",
        "Customer Project": "MDC Project ID",
        "Notes": "Task Notes",
    },
    inplace=True,
)

tasks_df.drop(
    columns=[
        "Subtasks",
        "Customer Repos",
        "Timeline Hours (Estimated)__formula",
        "Timeline__visualization_type",
        "Actual Time__running",
        "Timeline__to",
        "Timeline__from",
        "Timeline__changed_at",
        "Timeline Days",
        "Total Actual Hours__formula",
        "Date Added__default_formatter",
        "Timeline",
        "Actual Time__startDate",
        "Actual Time__changed_at",
        "Actual Time",
        "Date Added",
        "Dependencies",
        "Project Status__mirror",
        "Project Closed Date__mirror",
        "Extended Hours",
        "Reported Month-end Date",
    ],
    inplace=True,
)
tasks_df.head()

In [None]:
# Get just this period's tasks
# convert to a datetime... a bit crude for filtering by year and month
tasks_df["task_end_date"] = pd.to_datetime(tasks_df["Date Completed"])

tasks_df["task_end_year"] = pd.DatetimeIndex(tasks_df["task_end_date"]).year
tasks_df["task_end_month"] = pd.DatetimeIndex(tasks_df["task_end_date"]).month

# only interested in this month's tasks
mask = (tasks_df["task_end_year"] == year_for_report) & (
    tasks_df["task_end_month"] == month_for_report
)
tasks_df = tasks_df.loc[mask].copy()

tasks_df = tasks_df.explode(["MDC Project ID"], ignore_index=True)

tasks_df.drop(
    columns=["task_end_date", "task_end_year", "task_end_month"],
    inplace=True,
)

tasks_df["Month Ending Date"] = month_ending_date

tasks_df

## Validate takes an individual record and checks it against rules, and creates multiple task records where required.

If actual hours is used, then the number of owners dictates the number of journal records. E.g., actual hours = 15, with 3 owners, yields three journal entries at 5 each (actual hours / owner count).

In [None]:
def breakout_sessions(record, sessions_list):
    """
    run through sessions and
    create new journal tasks for each one

    return a list of dictionaries
    """
    session_records = []

    actual_hours = record["Actual Time__duration"] / 60 / 60

    task_id = record["MDC Task ID"]

    i = 1
    # multiply the number of tasks by sessions
    for session in sessions_list:

        new_rec = copy.deepcopy(record)

        new_rec["MDC Resource ID"] = session["ended_user_id"]
        new_rec["Resource Name"] = users_df.loc[session["ended_user_id"]]["name"]

        start_date = parser.parse(session["started_at"])
        end_date = parser.parse(session["ended_at"])
        new_rec["Session Completed"] = end_date.strftime("%Y-%m-%d")

        # take the difference between the two dates as hours
        difference = end_date - start_date
        new_rec["Completed Hours"] = difference.total_seconds() / 60 / 60

        # show each task ID with an index
        new_rec["MDC Task ID"] = f"{task_id}-{i}"
        i = i + 1

        new_rec["integration_state_rule"] = "hours_from_session_records"
        session_records.append(new_rec)

    return session_records

In [None]:
def breakdown_owners(record, owners_list):
    """
    check for actual hours records, and split the hours across the owners

    return a list of dictionaries
    """
    owner_records = []

    actual_hours = record["Actual Hours"]
    len_owners_list = len(owners_list)
    date_completed = record["Date Completed"]

    # split the hours up between the owners
    for owner in owners_list:
        new_rec = copy.deepcopy(record)

        # overwrite the owner
        new_rec["MDC Resource ID"] = owner["id"]
        new_rec["Resource Name"] = users_df.loc[owner["id"]]["name"]

        # divide the task time
        new_rec["Completed Hours"] = actual_hours / len_owners_list

        new_rec["Session Completed"] = parser.parse(f"{date_completed}").strftime(
            "%Y-%m-%d"
        )

        new_rec["integration_state_rule"] = "hours_split_between_owners"
        owner_records.append(new_rec)

    return owner_records

In [None]:
records = tasks_df.to_dict(orient="records")

calcd_records = []

for record in records:

    owners_list = record["Owner"] if isinstance(record["Owner"], list) else []
    sessions_list = (
        record["Actual Time__additional_value"]
        if isinstance(record["Actual Time__additional_value"], list)
        else []
    )

    if len(sessions_list) > 0:
        logger.info(f"Breaking down {record['MDC Task ID']} by sessions")
        session_records = breakout_sessions(record, sessions_list)
        calcd_records = calcd_records + session_records
    elif len(owners_list) > 0:
        logger.info(f"Breaking out {record['MDC Task ID']} by owners")
        owner_records = breakdown_owners(record, owners_list)
        calcd_records = calcd_records + owner_records
    else:
        logger.error("Neither owners nor sessions have lists.")

In [None]:
journal_task_df = pd.DataFrame(calcd_records)
journal_task_df.drop(
    columns=[
        "Status__changed_at",
        "Actual Time__duration",
        "Actual Time__additional_value",
        "Owner",
        "Actual Hours",
    ],
    inplace=True,
)
journal_task_df.rename(columns={"Status__text": "Status"}, inplace=True)
journal_task_df

Finally merge the tasks and projects together for a final task list.

In [None]:
# the final dataframe includes the clients, projects and the tasks
df = pd.merge(
    journal_task_df,
    projects_df,
    how="left",
    on="MDC Project ID",
)
df

In [None]:
# create a group by report for posting to SE Project/Grant Time smartsheet
report_df = (
    df.groupby(["MDC Client ID", "MDC Project ID", "MDC Resource ID"])
    .agg(
        {
            "Completed Hours": "sum",
            "Month Ending Date": "first",
            "Grant Number": "first",
            "No Bill": "first",
            "Client Name": "first",
            "Project Name": "first",
            "Resource Name": "first",
        }
    )
    .reset_index()
)
report_df

In [None]:
# only need billable
# report_df = report_df.loc[report_df["No Bill"] == False].reset_index(drop=True)
# report_df

## Create JSON objects

In [None]:
def get_resource(users_df, resource_id):
    """
    Given the users DF, lookup resource by resource ID

    Return a list of dict
    """
    users_df["MDC Resource ID"] = users_df.index
    users_dict = users_df.loc[resource_id].to_dict()

    resource = None
    if len(users_dict) > 0:
        resource = {
            "MDC Resource ID": str(users_dict["MDC Resource ID"]),
            "Resource Name": users_dict["name"],
        }

    return resource


# mini test
# get_resource(users_df, resource_id=25815860)

In [None]:
def get_tasks(users_df, tasks_df, project_id, resource_id):
    """
    Given the final tasks DF, lookup resources and tasks by project ID and resource ID

    Return a list of dict
    """

    # keep a roster of resources and tasks

    task_dict = tasks_df.loc[
        (
            (tasks_df["MDC Project ID"] == project_id)
            & (tasks_df["MDC Resource ID"] == resource_id)
        )
    ].to_dict("records")

    resource = get_resource(users_df, resource_id)
    resource["Tasks"] = []

    for task in task_dict:

        # build up a task block
        task_dict = {
            "MDC Task ID": str(task["MDC Task ID"]),
            "Task Name": task["Task Name"],
            "Notes": task["Task Notes"],
            "Pull Request URL": task["Pull Request URL"],
            "Issue URL": task["Issue URL"],
            "Completed Hours": task["Completed Hours"],
            "Task Complete Date": task["Date Completed"],
            "Integration Message": task["integration_state_rule"],
            "Session Complete Date": task["Session Completed"],
        }

        # append it to the list
        resource["Tasks"].append(task_dict)

    return resource


# mini test
# pprint(get_tasks(users_df, df, project_id=2303324267, resource_id=25815853))

In [None]:
# open the schema
with open(os.path.join("schema", "effort_hours-resource.json"), "r") as file:
    schema = json.load(file)

In [None]:
# loop through the summary report and create files
for index, row in report_df.iterrows():
    project_id = row["MDC Project ID"]
    resource_id = row["MDC Resource ID"]
    resource_task_dict = get_tasks(
        users_df,
        df,
        project_id=project_id,
        resource_id=resource_id,
    )
    assert (True, None) == validate_json(schema, resource_task_dict)
    filename = f"{project_id}_{resource_id}_{month_ending_date}.json"
    with open(os.path.join("_cache", filename), "w") as f:
        f.write(json.dumps(resource_task_dict, indent=4))

Now, Smartsheet's turn?

In [None]:
# smartsheet can have duplicate sheet names,
# so best not to rely on them
time_sheet_id = 3567675495475076
time_sheet = ss_client.Sheets.get_sheet(time_sheet_id)

In [None]:
# break down the cell IDs into a quick lookup box
cell_ids = {}
for column in time_sheet.columns:
    my_column = column.to_dict()
    cell_ids[my_column["title"]] = my_column["id"]
cell_ids

Add the records to Smartsheet

In [None]:
rows = []
for k, v in report_df.to_dict("index").items():

    row = ss_client.models.row.Row()

    row.cells.append(
        {"column_id": cell_ids["Account/Client"], "value": v["Client Name"]}
    )
    if v.get("Grant Number"):
        row.cells.append(
            {"column_id": cell_ids["Grant Proposal #"], "value": v["Grant Number"]}
        )
    row.cells.append(
        {"column_id": cell_ids["Project Title"], "value": v["Project Name"]}
    )
    row.cells.append(
        {"column_id": cell_ids["Month-end Date"], "value": v["Month Ending Date"]}
    )
    row.cells.append(
        {"column_id": cell_ids["Completed Hours"], "value": v["Completed Hours"]}
    )
    row.cells.append({"column_id": cell_ids["Resource"], "value": v["Resource Name"]})
    row.cells.append(
        {"column_id": cell_ids["MDC Account ID"], "value": v["MDC Client ID"]}
    )
    row.cells.append(
        {"column_id": cell_ids["MDC Project ID"], "value": v["MDC Project ID"]}
    )
    row.cells.append(
        {"column_id": cell_ids["MDC Resource ID"], "value": v["MDC Resource ID"]}
    )

    row.cells.append({"column_id": cell_ids["Posted Date"], "value": prepared_date})

    row.to_bottom = True
    rows.append(row)

In [None]:
result = None
if rows and post_records:
    logger.info("Adding %s posted rows to SmartSheet", len(rows))
    result = ss_client.Sheets.add_rows(time_sheet_id, rows)

result

In [None]:
# get the row ids and and update the dataframe so we know where to attach a file
if result and post_records:
    my_list = []
    for row in result.to_dict()["data"]:
        my_list.append(row["id"])

    row_series = pd.Series(my_list, name="row_id", dtype=np.int64)
    row_series

    report_df = pd.concat([report_df, row_series], axis=1)
else:
    logger.warning("Not posting")

report_df

In [None]:
# attach the json we created way above in the _cache folder
if post_records:
    for index, row in report_df.iterrows():
        filename = f"{row['MDC Project ID']}_{row['MDC Resource ID']}_{row['Month Ending Date']}.json"
        logger.info(f"Writing file {filename} to smartsheet...")

        with open(os.path.join("_cache", filename), "r") as f:
            ss_client.Attachments.attach_file_to_row(time_sheet_id, row["row_id"], f)

In [None]:
if post_records:
    for index, row in df.iterrows():
        result = conn.items.change_item_value(
            TASKS_BOARD_ID, 
            str(row["MDC Task ID"]).split('-')[0], 
            "text01", 
            f"Posted - {datetime.now()}",
        )
else:
    logger.warning("Not posting")

Delete any estimates

In [None]:
# delete the estimates only
result = None
# only do this if we had some monthly tasks to report
if post_records and len(report_df) > 0:
    result = ss_client.Sheets.get_sheet(time_sheet_id, filter_id=5850658663360388)
    rows_to_delete = [x["id"] for x in result.to_dict()["rows"] if not x["filteredOut"]]
    if rows_to_delete:
        result = ss_client.Sheets.delete_rows(posted_tasks_id, rows_to_delete)
else:
    logger.warning("Not writing estimates")

result