# Create Monthly Task Reports in Smartsheet

Takes records from MDC and creates a JSON object for later reporting.

In [1]:
#%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import os
import json
import logging
from datetime import datetime, timedelta
from dateutil import parser
from typing import Dict, List
from pprint import pprint
import pandas as pd
import numpy as np
from pathlib import Path
import smartsheet

# uses the pretty okay SDK here: https://github.com/ProdPerfect/monday
from monday import MondayClient

from mondaydotcom_utils.formatted_value import (
    FormattedValue,
    get_col_defs,
    get_items_by_board,
)

import scrapbook as sb
import dotenv

from jsonschema import validate, ValidationError, RefResolver
from jsonschema.exceptions import RefResolutionError

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

<IPython.core.display.Javascript object>

In [3]:
# MDC boards
TASKS_BOARD_ID = "1883170887"
AGREEMENTS_BOARD_ID = "1882423671"
PROJECTS_BOARD_ID = "1882404316"
ACCOUNTS_BOARD_ID = "1882424009"

# SS tables
posted_tasks_id = 3567675495475076

<IPython.core.display.Javascript object>

In [4]:
environment = "dev"

# change these or set as papermill parameters to report on year and month
year_for_report = 2021
month_for_report = 9

# if entering custom report notes
notes = ""

# it is possible to run this without posting to smartsheet... useful for testing
post_records = False

# filter for "ready" tasks
filter_tasks_by = "Posted"  # or also "Posted"

<IPython.core.display.Javascript object>

In [5]:
# check the environment vars for secrets

env_file = f".env-{environment}"
logger.info("Loading the .env file from %s", env_file)
dotenv.load_dotenv(dotenv.find_dotenv(env_file))

assert os.environ.get("MONDAY_KEY"), f"MONDAY_KEY not found in {env_file}"
assert os.environ.get("SMARTSHEET_KEY"), f"SMARTSHEET_KEY not found in {env_file}"

<IPython.core.display.Javascript object>

In [6]:
# connect monday client
conn = MondayClient(os.environ.get("MONDAY_KEY"))

<IPython.core.display.Javascript object>

In [7]:
# connect smartsheet client
ss_client = smartsheet.Smartsheet(os.environ.get("SMARTSHEET_KEY"))
ss_client.errors_as_exceptions(True)

<IPython.core.display.Javascript object>

In [8]:
def month_end_date(year, month):
    """Calculate the month end date given a year and month."""
    month += 1
    if month == 13:
        month = 1
        year += 1

    tempdate = datetime.strptime(f"{year}-{month}-1", "%Y-%m-%d")
    return (tempdate - timedelta(days=1)).strftime("%Y-%m-%d")

<IPython.core.display.Javascript object>

In [9]:
month_ending_date = month_end_date(year_for_report, month_for_report)
month_ending_date

'2021-09-30'

<IPython.core.display.Javascript object>

In [10]:
prepared_date = datetime.today().strftime("%Y-%m-%d")
prepared_date

'2022-07-05'

<IPython.core.display.Javascript object>

In [11]:
# bug between ProdPerfect and MDC's API: https://github.com/ProdPerfect/monday/issues/57
from monday.resources.base import BaseResource

query = """query
    {
        users () {
            id
            name
            email
            enabled
        }
    }"""
query

'query\n    {\n        users () {\n            id\n            name\n            email\n            enabled\n        }\n    }'

<IPython.core.display.Javascript object>

In [12]:
base_resource = BaseResource(os.environ.get("MONDAY_KEY"))
users = base_resource._query(query)["data"]["users"]

<IPython.core.display.Javascript object>

In [13]:
users_df = pd.DataFrame(users).set_index("id")
users_df.head()

Unnamed: 0_level_0,name,email,enabled
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
25810257,Steve Taylor,stephen.taylor@cuanschutz.edu,True
25815853,Faisal Alquaddoomi,faisal.alquaddoomi@cuanschutz.edu,True
25815860,Vincent Rubinetti,vincent.rubinetti@cuanschutz.edu,True
26327954,Audrey Wen,audrey.wen@cuanschutz.edu,True
28137107,Holly Heilman,holly.heilman@cuanschutz.edu,True


<IPython.core.display.Javascript object>

In [14]:
accounts_df = get_items_by_board(conn, ACCOUNTS_BOARD_ID).fillna("")

accounts_df.rename(
    columns={
        "monday_id": "MDC Client ID",
        "monday_name": "Client Name",
        "No Bill__checked": "No Bill",
        "Notes": "Client Notes",
    },
    inplace=True,
)

accounts_df.drop(
    columns=[
        "Contacts",
        "Subitems",
        "Customer Projects",
        "No Bill__changed_at",
        "Type__text",
        "Type__changed_at",
    ],
    inplace=True,
)

accounts_df

Unnamed: 0,MDC Client ID,Client Name,Client Notes,No Bill
0,1882439999,HealthAI: Admin & Operations,A catch-all account for any work we do for Aud...,True
1,1883644776,HealthAI: Bennett Lab,,False
2,1882681138,HealthAI: Greene Lab,,False
3,1883648098,HealthAI: Hunter Lab,,False
4,1882681714,HealthAI: TISLab,,False
5,1883649981,HealthAI: Way Lab,,False
6,1882462147,CU SOM: IT Department,,False
7,1882588856,CIDA: Center for Innovative Design & Analysis,,False
8,1907269862,HealthAI: Sean Davis,,False
9,2246385174,HealthAI: Dwork Lab,,False


<IPython.core.display.Javascript object>

In [15]:
projects_df = get_items_by_board(conn, PROJECTS_BOARD_ID).fillna("")

projects_df.rename(
    columns={
        "monday_id": "MDC Project ID",
        "monday_name": "Project Name",
        "Project Lifecycle__text": "Project Lifecycle",
        "Account": "MDC Client ID",
        "Notes": "Project Notes",
    },
    inplace=True,
)

projects_df.drop(
    columns=[
        "Project Tasks",
        "Subitems",
        "Project Contacts",
        "Timeline",
        "Customer Source",
        "Dependency",
        "Date Added",
        "Timeline Days",
        "Date Added__default_formatter",
        "Tasks Status__mirror",
        "Project Lifecycle__changed_at",
        "Repo Description__mirror",
        "Timeline__to",
        "Timeline__from",
        "Timeline__changed_at",
    ],
    inplace=True,
)

projects_df = projects_df.explode(["MDC Client ID"], ignore_index=True)
projects_df

Unnamed: 0,MDC Project ID,Project Name,Project Lifecycle,MDC Client ID,Grant Number,Project Notes,Project Closed Date
0,1888314634,CHAI: Admin Technology Foundation,Open,1882439999,,,
1,2208602434,CHAI: Department Financials,Open,1882439999,,,
2,2303312548,TISLab: Staffing/Support FY2022,Open,1882681714,,General non-grant staff support,
3,2303324267,Greenelab: Staffing/Support FY2022,Open,1882681138,,General non-grant staff support,
4,2334955423,Way Lab: Staffing/Support FY2022,Open,1883649981,,General non-grant staff support,
5,1882738595,Greenelab: mygeneset.info,Open,1882681138,,Collaboration with BioThings organization. Web...,
6,2831164270,CHAI: CY2022 Prof. Dev. Goal/Tasks,Open,1882439999,,For tracking tasks against professional develo...,
7,1882739627,CHAI: Manubot next-gen,,1882439999,213269.0,Pending scoping and schedule; Issue resolution...,
8,2249818009,TISLab: Monarch UI (3.0) Extended 2022,,1882681714,,Pending: Enhancements and extended Monarch UI ...,2022-05-12
9,2265215456,Way: pyctytominer (Python),,1883649981,221151.0,Pending: Python and build support,


<IPython.core.display.Javascript object>

In [16]:
# add the account to the projects
account_projects_df = pd.merge(projects_df, accounts_df, how="left", on="MDC Client ID")
account_projects_df

Unnamed: 0,MDC Project ID,Project Name,Project Lifecycle,MDC Client ID,Grant Number,Project Notes,Project Closed Date,Client Name,Client Notes,No Bill
0,1888314634,CHAI: Admin Technology Foundation,Open,1882439999,,,,HealthAI: Admin & Operations,A catch-all account for any work we do for Aud...,True
1,2208602434,CHAI: Department Financials,Open,1882439999,,,,HealthAI: Admin & Operations,A catch-all account for any work we do for Aud...,True
2,2303312548,TISLab: Staffing/Support FY2022,Open,1882681714,,General non-grant staff support,,HealthAI: TISLab,,False
3,2303324267,Greenelab: Staffing/Support FY2022,Open,1882681138,,General non-grant staff support,,HealthAI: Greene Lab,,False
4,2334955423,Way Lab: Staffing/Support FY2022,Open,1883649981,,General non-grant staff support,,HealthAI: Way Lab,,False
5,1882738595,Greenelab: mygeneset.info,Open,1882681138,,Collaboration with BioThings organization. Web...,,HealthAI: Greene Lab,,False
6,2831164270,CHAI: CY2022 Prof. Dev. Goal/Tasks,Open,1882439999,,For tracking tasks against professional develo...,,HealthAI: Admin & Operations,A catch-all account for any work we do for Aud...,True
7,1882739627,CHAI: Manubot next-gen,,1882439999,213269.0,Pending scoping and schedule; Issue resolution...,,HealthAI: Admin & Operations,A catch-all account for any work we do for Aud...,True
8,2249818009,TISLab: Monarch UI (3.0) Extended 2022,,1882681714,,Pending: Enhancements and extended Monarch UI ...,2022-05-12,HealthAI: TISLab,,False
9,2265215456,Way: pyctytominer (Python),,1883649981,221151.0,Pending: Python and build support,,HealthAI: Way Lab,,False


<IPython.core.display.Javascript object>

In [17]:
# only getting done tasks
tasks_df = get_items_by_board(conn, TASKS_BOARD_ID, "status", "Done")

# Only include Ready tasks
if filter_tasks_by == "Ready":
    tasks_df = tasks_df.loc[
        tasks_df["Integration Message"].str.startswith("Ready", na=False)
    ]
elif filter_tasks_by == "Posted":
    # had to add this to redo the old stuff into a new json format
    tasks_df = tasks_df.loc[
        tasks_df["Integration Message"].str.startswith("Posted", na=False)
    ]
else:
    # don't filter at all
    pass

tasks_df.rename(
    columns={
        "monday_id": "MDC Task ID",
        "monday_name": "Task Name",
        "Customer Project": "MDC Project ID",
        "Notes": "Task Notes",
    },
    inplace=True,
)

tasks_df.drop(
    columns=[
        "Subtasks",
        "Customer Repos",
        "Timeline Hours (Estimated)__formula",
        "Timeline__visualization_type",
        "Actual Time__running",
        "Timeline__to",
        "Timeline__from",
        "Timeline__changed_at",
        "Timeline Days",
        "Total Actual Hours__formula",
        "Date Added__default_formatter",
        "Timeline",
        "Actual Time__startDate",
        "Actual Time__changed_at",
        "Actual Time",
        "Date Added",
        "Dependencies",
        "Project Status__mirror",
        "Project Closed Date__mirror",
    ],
    inplace=True,
)

tasks_df = tasks_df.explode(["MDC Project ID"], ignore_index=True)
tasks_df.head()

Unnamed: 0,MDC Task ID,Task Name,MDC Project ID,Owner,Status__text,Status__changed_at,Integration Message,Actual Time__duration,Actual Time__additional_value,Actual Hours,Task Notes,Pull Request URL,Issue URL,Date Completed,Extended Hours,Reported Month-end Date
0,1906313744,"Lab Website Template v0.5 accessibility, perfo...",1882712838,"[{'id': 25815860, 'kind': 'person'}]",Done,2021-11-15T17:07:34.444Z,Posted - 2022-04-27 16:39:08.404092,64800.0,"[{'id': 150436869, 'account_id': 10368903, 'pr...",,,,,2021-11-19,,
1,1906337176,Preprint Similarity Search app maintenance and...,1977980999,"[{'id': 25815860, 'kind': 'person'}]",Done,2021-11-15T17:11:18.983Z,Posted - 2022-04-27 16:39:12.590458,18000.0,"[{'id': 150439621, 'account_id': 10368903, 'pr...",,,,,2021-11-17,,
2,1906376074,Implement modal component,1882442059,"[{'id': 25815860, 'kind': 'person'}]",Done,2021-11-17T22:13:28.278Z,Posted - 2022-04-27 16:39:14.913360,15505.0,"[{'id': 150443741, 'account_id': 10368903, 'pr...",,https://github.com/monarch-initiative/monarch-...,,,2021-11-23,,
3,1912542060,Patch AWS (or remove entirely),2303324267,"[{'id': 25810257, 'kind': 'person'}]",Done,2022-01-25T21:59:56.747Z,Posted - 2022-04-27 16:44:52.850435,,,2.0,Time spent reviewing options; finally decided ...,,,2022-01-25,,
4,1916012594,graphdb-deployer development,1882913862,"[{'id': 25815853, 'kind': 'person'}]",Done,2022-01-26T17:31:13.547Z,Posted - 2022-04-27 16:45:11.015272,69548.0,"[{'id': 159439810, 'account_id': 10368903, 'pr...",,"includes initial setup, testing w/kevin, gener...",,,2022-01-07,,


<IPython.core.display.Javascript object>

Validate takes an individual record and checks it against rules, and creates multiple task records where required.

If actual hours is used, then the number of owners dictates the number of journal records. E.g., actual hours = 15, with 3 owners, yields three journal entries at 5 each (actual hours / owner count).

In [18]:
records = tasks_df.to_dict(orient="records")

<IPython.core.display.Javascript object>

In [19]:
# check for actual hours records
owner_records = []

for record in records:

    actual_hours = record["Actual Hours"]
    owners_list = record["Owner"] if isinstance(record["Owner"], list) else []
    len_owners_list = len(owners_list)
    date_completed = record["Date Completed"]

    if not np.isnan(actual_hours):
        # split the hours up between the owners
        for owner in owners_list:
            new_rec = record.copy()

            # overwrite the owner
            new_rec["MDC Resource ID"] = owner["id"]
            new_rec["Resource Name"] = users_df.loc[owner["id"]]["name"]

            # divide the task time
            new_rec["hours"] = actual_hours / len_owners_list

            # get the task time from date completed... or fallback on the status
            if record.get("Date Completed") and record["Date Completed"]:
                new_rec["Session Completed"] = parser.parse(
                    f"{date_completed}"
                ).strftime("%Y-%m-%d")
            else:
                new_rec["Session Completed"] = parser.parse(
                    record["Status__changed_at"]
                ).strftime("%Y-%m-%d")

            new_rec["integration_state_rule"] = "hours_split_between_owners"
            owner_records.append(new_rec)

<IPython.core.display.Javascript object>

In [20]:
owner_records_df = pd.DataFrame(owner_records)
owner_records_df.drop(
    columns=[
        "Actual Time__additional_value",
        "Actual Time__duration",
        "Status__changed_at",
        "Actual Hours",
    ],
    inplace=True,
)
owner_records_df.rename(columns={"Status__text": "Status"}, inplace=True)
owner_records_df

Unnamed: 0,MDC Task ID,Task Name,MDC Project ID,Owner,Status,Integration Message,Task Notes,Pull Request URL,Issue URL,Date Completed,Extended Hours,Reported Month-end Date,MDC Resource ID,Resource Name,hours,Session Completed,integration_state_rule
0,1912542060,Patch AWS (or remove entirely),2303324267,"[{'id': 25810257, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:44:52.850435,Time spent reviewing options; finally decided ...,,,2022-01-25,,,25810257,Steve Taylor,2.0,2022-01-25,hours_split_between_owners
1,1957499412,Way Lab Grant Proposal Creation and Review,1957293587,"[{'id': 25810257, 'kind': 'person'}, {'id': 25...",Done,Posted - 2022-04-27 16:42:44.753809,,,,2021-12-04,,,25810257,Steve Taylor,2.0,2021-12-04,hours_split_between_owners
2,1957499412,Way Lab Grant Proposal Creation and Review,1957293587,"[{'id': 25810257, 'kind': 'person'}, {'id': 25...",Done,Posted - 2022-04-27 16:42:44.753809,,,,2021-12-04,,,25815853,Faisal Alquaddoomi,2.0,2021-12-04,hours_split_between_owners
3,2003198974,"""Word Lapse"" Proposal Creation and Review",1969468997,"[{'id': 25810257, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:42:47.060917,"200 hour proposal sent to Casey and David, 12/...",,,2021-12-04,,,25810257,Steve Taylor,2.0,2021-12-04,hours_split_between_owners
4,2064112158,"monarch blue/green data migration, research",1892630899,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:47:42.940380,,,,2022-02-23,,,25815853,Faisal Alquaddoomi,28.0,2022-02-23,hours_split_between_owners
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,2512879308,"chai website updates, meetings, pr reviews",1888314634,"[{'id': 25815860, 'kind': 'person'}]",Done,Posted - No Bill - 2022-05-05 11:51:17.461770,* imported from SmartSheets records [sjt],,,2021-10-20,,,25815860,Vincent Rubinetti,8.0,2021-10-20,hours_split_between_owners
60,2516729878,incorporate 3rd party reactome viewer and geno...,1882442059,"[{'id': 25815860, 'kind': 'person'}]",Done,Posted - 2022-06-02 08:19:42.042905,Captured in extended and new phases of work. D...,,,2022-05-25,,,25815860,Vincent Rubinetti,0.0,2022-05-25,hours_split_between_owners
61,2618506907,3rd-party monday.com API has rate-limiting blo...,2208602434,"[{'id': 25810257, 'kind': 'person'}]",Done,Posted - No Bill - 2022-06-02 08:19:42.830261,The ProdPerfect monday.com API library is pass...,,https://github.com/ProdPerfect/monday/issues/57,2022-05-04,,,25810257,Steve Taylor,4.0,2022-05-04,hours_split_between_owners
62,2627157922,Coursera: Researcher Management and Leadership...,2831164270,"[{'id': 29667633, 'kind': 'person'}]",Done,Posted - No Bill - 2022-06-02 08:19:43.721551,Learning CU Anschutz and broader research proc...,,,2022-05-12,,,29667633,Dave Bunten,24.0,2022-05-12,hours_split_between_owners


<IPython.core.display.Javascript object>

A Task in MDC can track work done in 'sessions' anyone can create a session on a Task, even one "owned" by someone else. This doesn't happen often here, but the following breaks out the sessions as separate task records.

In [21]:
# run through the records, looking for sessions, and if found,
# create new journal tasks for each one
session_records = []

for record in records:

    if isinstance(record["Actual Time__additional_value"], list):
        sessions_list = record["Actual Time__additional_value"]
    else:
        sessions_list = []

    actual_hours = record["Actual Time__duration"] / 60 / 60

    i = 1
    # multiply the number of tasks by sessions
    for session in sessions_list:
        new_rec = record.copy()

        new_rec["MDC Resource ID"] = session["ended_user_id"]
        new_rec["Resource Name"] = users_df.loc[session["ended_user_id"]]["name"]

        start_date = parser.parse(session["started_at"])
        end_date = parser.parse(session["ended_at"])
        new_rec["Session Completed"] = end_date.strftime("%Y-%m-%d")

        # take the difference between the two dates as hours
        difference = end_date - start_date
        new_rec["hours"] = difference.total_seconds() / 60 / 60

        # show each task ID with an index
        new_rec["MDC Task ID"] = f"{record['MDC Task ID']}-{i}"
        i = i + 1

    new_rec["integration_state_rule"] = "hours_from_session_records"
    session_records.append(new_rec)

session_records_df = pd.DataFrame(session_records)
session_records_df.drop(
    columns=[
        "Actual Time__additional_value",
        "Actual Time__duration",
        "Status__changed_at",
        "Actual Hours",
    ],
    inplace=True,
)
session_records_df.rename(columns={"Status__text": "Status"}, inplace=True)
session_records_df.sort_values(by="Task Name")

Unnamed: 0,MDC Task ID,Task Name,MDC Project ID,Owner,Status,Integration Message,Task Notes,Pull Request URL,Issue URL,Date Completed,Extended Hours,Reported Month-end Date,MDC Resource ID,Resource Name,Session Completed,hours,integration_state_rule
102,2512878891-2,Accessibility research and reading,1888314634,"[{'id': 25815860, 'kind': 'person'}]",Done,Posted - No Bill - 2022-05-05 11:53:27.153605,,,,2021-11-19,,,25815860,Vincent Rubinetti,2021-11-10,8.0,hours_from_session_records
100,2512878852-1,Add Center to zendesk website form,1888314634,"[{'id': 25810257, 'kind': 'person'}]",Done,Posted - No Bill - 2022-05-05 11:53:26.484196,Ticket submitted: https://medschool.zendesk.co...,,,2021-11-09,,,25810257,Steve Taylor,2021-11-09,0.5,hours_from_session_records
5,1916502641-4,"Add api mocks to cypress, move from mock-adapt...",1882442059,"[{'id': 25815860, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:42:35.471516,https://github.com/monarch-initiative/monarch-...,,,2021-12-03,,,25815860,Vincent Rubinetti,2021-11-22,8.0,hours_from_session_records
41,2251375853-3,Add build and PyPI publish to pycytominer,2334955423,"[{'id': 25810257, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:49:38.746731,,https://github.com/cytomining/pycytominer/pull...,,2022-02-08,,,25810257,Steve Taylor,2022-02-08,1.0,hours_from_session_records
144,2571669338-3,CCPM GTC debugging,1888314634,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - No Bill - 2022-05-05 11:59:17.788244,,,,2022-04-21,,,25815853,Faisal Alquaddoomi,2022-04-20,1.0,hours_from_session_records
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,2366527256-5,updating backend to deal with new model changes,1969468997,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-04-27 17:02:06.484784,,,,2022-03-14,,,25815853,Faisal Alquaddoomi,2022-03-12,8.0,hours_from_session_records
30,2249553502-4,word-lapse API: optimizing neighbors endpoint,1969468997,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:49:17.192304,,,,2022-02-23,,,25815853,Faisal Alquaddoomi,2022-02-09,3.0,hours_from_session_records
93,2487370238-2,word-lapse autocomplete support,1969468997,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-04-27 17:03:00.817252,,,https://github.com/greenelab/word-lapse/issues/27,2022-03-30,,,25815853,Faisal Alquaddoomi,2022-03-03,3.0,hours_from_session_records
94,2487376919-1,word-lapse caching server-side support,1969468997,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-04-27 17:03:02.759507,,,,2022-03-30,,,25815853,Faisal Alquaddoomi,2022-03-04,3.0,hours_from_session_records


<IPython.core.display.Javascript object>

In [22]:
# merge the owner and session records
journal_task_df = pd.concat([owner_records_df, session_records_df])
journal_task_df

Unnamed: 0,MDC Task ID,Task Name,MDC Project ID,Owner,Status,Integration Message,Task Notes,Pull Request URL,Issue URL,Date Completed,Extended Hours,Reported Month-end Date,MDC Resource ID,Resource Name,hours,Session Completed,integration_state_rule
0,1912542060,Patch AWS (or remove entirely),2303324267,"[{'id': 25810257, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:44:52.850435,Time spent reviewing options; finally decided ...,,,2022-01-25,,,25810257,Steve Taylor,2.0,2022-01-25,hours_split_between_owners
1,1957499412,Way Lab Grant Proposal Creation and Review,1957293587,"[{'id': 25810257, 'kind': 'person'}, {'id': 25...",Done,Posted - 2022-04-27 16:42:44.753809,,,,2021-12-04,,,25810257,Steve Taylor,2.0,2021-12-04,hours_split_between_owners
2,1957499412,Way Lab Grant Proposal Creation and Review,1957293587,"[{'id': 25810257, 'kind': 'person'}, {'id': 25...",Done,Posted - 2022-04-27 16:42:44.753809,,,,2021-12-04,,,25815853,Faisal Alquaddoomi,2.0,2021-12-04,hours_split_between_owners
3,2003198974,"""Word Lapse"" Proposal Creation and Review",1969468997,"[{'id': 25810257, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:42:47.060917,"200 hour proposal sent to Casey and David, 12/...",,,2021-12-04,,,25810257,Steve Taylor,2.0,2021-12-04,hours_split_between_owners
4,2064112158,"monarch blue/green data migration, research",1892630899,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:47:42.940380,,,,2022-02-23,,,25815853,Faisal Alquaddoomi,28.0,2022-02-23,hours_split_between_owners
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,2625612513-2,"review, merge preprint model PR",1969468997,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-06-02 08:19:57.195418,,,,2022-05-11,,,25815853,Faisal Alquaddoomi,1.0,2022-05-04,hours_from_session_records
161,2660065499-1,remove umap from backend,1969468997,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-06-02 08:19:58.135173,,,,2022-05-11,,,25815853,Faisal Alquaddoomi,0.5,2022-05-09,hours_from_session_records
162,2660074814-1,investigate neighbor sorting + swagger docs,1969468997,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-06-02 08:19:59.075372,,,,2022-05-23,,,25815853,Faisal Alquaddoomi,0.5,2022-05-09,hours_from_session_records
163,2660074814-1,investigate neighbor sorting + swagger docs,1969468997,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-06-02 08:19:59.075372,,,,2022-05-23,,,25815853,Faisal Alquaddoomi,0.5,2022-05-09,hours_from_session_records


<IPython.core.display.Javascript object>

In [23]:
# convert to a datetime... a bit crude for filtering by year and month
date_completed = pd.to_datetime(journal_task_df["Date Completed"])

journal_task_df["task_end_year"] = pd.DatetimeIndex(date_completed).year
journal_task_df["task_end_month"] = pd.DatetimeIndex(date_completed).month

journal_task_df.head()

Unnamed: 0,MDC Task ID,Task Name,MDC Project ID,Owner,Status,Integration Message,Task Notes,Pull Request URL,Issue URL,Date Completed,Extended Hours,Reported Month-end Date,MDC Resource ID,Resource Name,hours,Session Completed,integration_state_rule,task_end_year,task_end_month
0,1912542060,Patch AWS (or remove entirely),2303324267,"[{'id': 25810257, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:44:52.850435,Time spent reviewing options; finally decided ...,,,2022-01-25,,,25810257,Steve Taylor,2.0,2022-01-25,hours_split_between_owners,2022,1
1,1957499412,Way Lab Grant Proposal Creation and Review,1957293587,"[{'id': 25810257, 'kind': 'person'}, {'id': 25...",Done,Posted - 2022-04-27 16:42:44.753809,,,,2021-12-04,,,25810257,Steve Taylor,2.0,2021-12-04,hours_split_between_owners,2021,12
2,1957499412,Way Lab Grant Proposal Creation and Review,1957293587,"[{'id': 25810257, 'kind': 'person'}, {'id': 25...",Done,Posted - 2022-04-27 16:42:44.753809,,,,2021-12-04,,,25815853,Faisal Alquaddoomi,2.0,2021-12-04,hours_split_between_owners,2021,12
3,2003198974,"""Word Lapse"" Proposal Creation and Review",1969468997,"[{'id': 25810257, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:42:47.060917,"200 hour proposal sent to Casey and David, 12/...",,,2021-12-04,,,25810257,Steve Taylor,2.0,2021-12-04,hours_split_between_owners,2021,12
4,2064112158,"monarch blue/green data migration, research",1892630899,"[{'id': 25815853, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:47:42.940380,,,,2022-02-23,,,25815853,Faisal Alquaddoomi,28.0,2022-02-23,hours_split_between_owners,2022,2


<IPython.core.display.Javascript object>

In [24]:
# only interested in this month's tasks
mask = (journal_task_df["task_end_year"] == year_for_report) & (
    journal_task_df["task_end_month"] == month_for_report
)
journal_task_df = journal_task_df.loc[mask].copy()
journal_task_df

Unnamed: 0,MDC Task ID,Task Name,MDC Project ID,Owner,Status,Integration Message,Task Notes,Pull Request URL,Issue URL,Date Completed,Extended Hours,Reported Month-end Date,MDC Resource ID,Resource Name,hours,Session Completed,integration_state_rule,task_end_year,task_end_month
9,2259600839,meetings and PR,1882442059,"[{'id': 25815860, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:36:56.979918,* imported from SmartSheets records [sjt],https://github.com/monarch-initiative/monarch-...,,2021-09-27,,,25815860,Vincent Rubinetti,48.0,2021-09-27,hours_split_between_owners,2021,9
10,2259614494,"monarch ui check in meeting, figma sketch changes",1882442059,"[{'id': 25815860, 'kind': 'person'}]",Done,Posted - 2022-04-27 16:36:59.369126,* imported from SmartSheets records [sjt],,,2021-09-28,,,25815860,Vincent Rubinetti,8.0,2021-09-28,hours_split_between_owners,2021,9


<IPython.core.display.Javascript object>

Finally merge the tasks and projects together for a final task list.

In [25]:
# quick func for the next dataframe apply
def month_end_me(row):
    return month_end_date(row["task_end_year"], row["task_end_month"])

<IPython.core.display.Javascript object>

In [26]:
df = pd.merge(
    journal_task_df,
    account_projects_df,
    how="left",
    on="MDC Project ID",
)

# create a month-ending date column
month_end_dates = df.apply(month_end_me, axis=1)
if len(month_end_dates) > 0:
    df["Month Ending Date"] = month_end_dates
else:
    # going through the motions now so the rest of the notebook will run
    df["Month Ending Date"] = None

df.rename(
    columns={
        "monday_id_x": "MDC Task ID",
        "monday_id_y": "MDC Project ID",
        "Title_x": "Title",
        "Title_y": "Project Title",
        "Notes_x": "Notes",
        "Notes_y": "Project Notes",
        "hours": "Hours",
    },
    inplace=True,
)

df.drop(
    columns=["task_end_year", "task_end_month", "Owner"],
    inplace=True,
)

df

Unnamed: 0,MDC Task ID,Task Name,MDC Project ID,Status,Integration Message,Task Notes,Pull Request URL,Issue URL,Date Completed,Extended Hours,...,Project Name,Project Lifecycle,MDC Client ID,Grant Number,Project Notes,Project Closed Date,Client Name,Client Notes,No Bill,Month Ending Date
0,2259600839,meetings and PR,1882442059,Done,Posted - 2022-04-27 16:36:56.979918,* imported from SmartSheets records [sjt],https://github.com/monarch-initiative/monarch-...,,2021-09-27,,...,TISLab: Monarch UI (3.0) Redesign,Closed,1882681714,213359,Was managed and tracked via github and zenhub;...,2022-05-25,HealthAI: TISLab,,False,2021-09-30
1,2259614494,"monarch ui check in meeting, figma sketch changes",1882442059,Done,Posted - 2022-04-27 16:36:59.369126,* imported from SmartSheets records [sjt],,,2021-09-28,,...,TISLab: Monarch UI (3.0) Redesign,Closed,1882681714,213359,Was managed and tracked via github and zenhub;...,2022-05-25,HealthAI: TISLab,,False,2021-09-30


<IPython.core.display.Javascript object>

In [27]:
# only need billable
df = df.loc[df["No Bill"] == False]
df

Unnamed: 0,MDC Task ID,Task Name,MDC Project ID,Status,Integration Message,Task Notes,Pull Request URL,Issue URL,Date Completed,Extended Hours,...,Project Name,Project Lifecycle,MDC Client ID,Grant Number,Project Notes,Project Closed Date,Client Name,Client Notes,No Bill,Month Ending Date
0,2259600839,meetings and PR,1882442059,Done,Posted - 2022-04-27 16:36:56.979918,* imported from SmartSheets records [sjt],https://github.com/monarch-initiative/monarch-...,,2021-09-27,,...,TISLab: Monarch UI (3.0) Redesign,Closed,1882681714,213359,Was managed and tracked via github and zenhub;...,2022-05-25,HealthAI: TISLab,,False,2021-09-30
1,2259614494,"monarch ui check in meeting, figma sketch changes",1882442059,Done,Posted - 2022-04-27 16:36:59.369126,* imported from SmartSheets records [sjt],,,2021-09-28,,...,TISLab: Monarch UI (3.0) Redesign,Closed,1882681714,213359,Was managed and tracked via github and zenhub;...,2022-05-25,HealthAI: TISLab,,False,2021-09-30


<IPython.core.display.Javascript object>

In [28]:
# create a group by report and post to SE Project/Grant Time smartsheet
report_df = (
    df.groupby(["MDC Client ID", "MDC Project ID", "MDC Resource ID"])
    .agg(
        {
            "Client Name": "first",
            "Project Name": "first",
            "Resource Name": "first",
            "Hours": "sum",
            "Month Ending Date": "first",
            "Grant Number": "first",
            "No Bill": "first",
        }
    )
    .reset_index()
)
report_df["Prepared Date"] = prepared_date
report_df

Unnamed: 0,MDC Client ID,MDC Project ID,MDC Resource ID,Client Name,Project Name,Resource Name,Hours,Month Ending Date,Grant Number,No Bill,Prepared Date
0,1882681714,1882442059,25815860,HealthAI: TISLab,TISLab: Monarch UI (3.0) Redesign,Vincent Rubinetti,56.0,2021-09-30,213359,False,2022-07-05


<IPython.core.display.Javascript object>

## Build up the JSON object

In [29]:
# a list of all reports
reports = []

<IPython.core.display.Javascript object>

In [30]:
def get_client(clients_df, client_id):
    """
    Given the accounts/client DF, lookup an individual client.

    Return a dict
    """
    client_dict = clients_df.loc[clients_df["MDC Client ID"] == client_id].to_dict(
        "records"
    )[0]

    client = {
        "MDC Client ID": str(client_dict["MDC Client ID"]),
        "Client Name": client_dict["Client Name"],
        "No Bill": client_dict["No Bill"],
    }

    return client


# mini test
# get_client(accounts_df, 1882681714)

<IPython.core.display.Javascript object>

In [31]:
def get_project(projects_df, project_id):
    """
    Given a project DF, lookup project info by project ID

    Return a list of dict
    """

    projects = []

    project_dict = projects_df.loc[projects_df["MDC Project ID"] == project_id].to_dict(
        "records"
    )
    if len(project_dict) > 0:

        project = {
            "MDC Project ID": str(project_dict[0]["MDC Project ID"]),
            "Project Name": project_dict[0]["Project Name"],
            "Grant Proposal #": project_dict[0]["Grant Number"],
        }
        if project_dict[0].get("Project Notes"):
            project["Notes"] = project_dict[0]["Project Notes"]

    return project


# mini test
# get_project(projects_df, 1882442059)

<IPython.core.display.Javascript object>

In [32]:
def get_resource(users_df, resource_id):
    """
    Given the users DF, lookup resource by resource ID

    Return a list of dict
    """
    users_df["MDC Resource ID"] = users_df.index
    users_dict = users_df.loc[resource_id].to_dict()

    resource = None
    if len(users_dict) > 0:
        resource = {
            "MDC Resource ID": str(users_dict["MDC Resource ID"]),
            "Resource Name": users_dict["name"],
        }

    return resource


# mini test
# get_resource(users_df, 25815860)

<IPython.core.display.Javascript object>

In [33]:
def get_tasks(users_df, tasks_df, project_id, resource_id):
    """
    Given the final tasks DF, lookup resources and tasks by project ID and resource ID

    Return a list of dict
    """

    # keep a roster of resources and tasks

    task_dict = tasks_df.loc[
        (
            (tasks_df["MDC Project ID"] == project_id)
            & (tasks_df["MDC Resource ID"] == resource_id)
        )
    ].to_dict("records")

    resource = get_resource(users_df, resource_id)
    resource["Tasks"] = []

    for task in task_dict:

        # build up a task block
        task_dict = {
            "MDC Task ID": str(task["MDC Task ID"]),
            "Task Name": task["Task Name"],
            "Notes": task["Task Notes"],
            "Pull Request URL": task["Pull Request URL"],
            "Issue URL": task["Issue URL"],
            "Completed Hours": task["Hours"],
            "Task Complete Date": task["Date Completed"],
            "Integration Message": task["integration_state_rule"],
            "Session Complete Date": task["Session Completed"],
        }

        # append it to the list
        resource["Tasks"].append(task_dict)

    return resource


# mini test
pprint(get_tasks(users_df, df, 1882442059, 25815860))

{'MDC Resource ID': '25815860',
 'Resource Name': 'Vincent Rubinetti',
 'Tasks': [{'Completed Hours': 48.0,
            'Integration Message': 'hours_split_between_owners',
            'Issue URL': None,
            'MDC Task ID': '2259600839',
            'Notes': '* imported from SmartSheets records [sjt]',
            'Pull Request URL': 'https://github.com/monarch-initiative/monarch-ui-new/pull/44',
            'Session Complete Date': '2021-09-27',
            'Task Complete Date': '2021-09-27',
            'Task Name': 'meetings and PR'},
           {'Completed Hours': 8.0,
            'Integration Message': 'hours_split_between_owners',
            'Issue URL': None,
            'MDC Task ID': '2259614494',
            'Notes': '* imported from SmartSheets records [sjt]',
            'Pull Request URL': None,
            'Session Complete Date': '2021-09-28',
            'Task Complete Date': '2021-09-28',
            'Task Name': 'monarch ui check in meeting, figma sketch chang

<IPython.core.display.Javascript object>

In [34]:
df.groupby(["MDC Project ID", "MDC Resource ID"]).agg(
    {"MDC Project ID": "first", "MDC Resource ID": "first"}
).to_dict("records")

[{'MDC Project ID': 1882442059, 'MDC Resource ID': 25815860}]

<IPython.core.display.Javascript object>

In [35]:
unique_clients_list = report_df["MDC Client ID"].unique().tolist()
unique_clients_list

[1882681714]

<IPython.core.display.Javascript object>

In [36]:
# we've come all this way (mostly because of MDC's subpar API), so we'll use report_df as our master
for client_id in unique_clients_list:

    report_dict = {
        "Month-ending Date": month_ending_date,
        "Report Prepared Date": prepared_date,
    }

    report_dict["Client"] = get_client(accounts_df, client_id)
    report_dict["Client"]["Projects"] = []

    unique_projects = (
        df.loc[df["MDC Client ID"] == client_id]["MDC Project ID"].unique().tolist()
    )

    for project_id in unique_projects:
        a_project = get_project(projects_df, project_id)

        unique_resources = (
            report_df.loc[
                (
                    (report_df["MDC Client ID"] == client_id)
                    & (report_df["MDC Project ID"] == project_id)
                )
            ]["MDC Resource ID"]
            .unique()
            .tolist()
        )

        for resource_id in unique_resources:

            # only add the Resources block if at least one shows up
            if not a_project.get("Resources"):
                a_project["Resources"] = []

            # get the project tasks block and append it to the project
            project_tasks = get_tasks(users_df, df, project_id, resource_id)
            a_project["Resources"].append(project_tasks)

        report_dict["Client"]["Projects"].append(a_project)

    reports.append(report_dict)

pprint(report_dict)

{'Client': {'Client Name': 'HealthAI: TISLab',
            'MDC Client ID': '1882681714',
            'No Bill': False,
            'Projects': [{'Grant Proposal #': '213359',
                          'MDC Project ID': '1882442059',
                          'Notes': 'Was managed and tracked via github and '
                                   'zenhub; backfilled from SmartSheet and '
                                   'zenhub with best effort.',
                          'Project Name': 'TISLab: Monarch UI (3.0) Redesign',
                          'Resources': [{'MDC Resource ID': '25815860',
                                         'Resource Name': 'Vincent Rubinetti',
                                         'Tasks': [{'Completed Hours': 48.0,
                                                    'Integration Message': 'hours_split_between_owners',
                                                    'Issue URL': None,
                                                    'MDC Task ID':

<IPython.core.display.Javascript object>

## Schema validation

In [37]:
def validate_json(schema, json_data):

    # mostly from https://stackoverflow.com/questions/25145160/json-schema-ref-does-not-work-for-relative-path
    schemas = (json.load(open(source)) for source in Path("schema").iterdir())
    schema_store = {schema["$id"]: schema for schema in schemas}

    resolver = RefResolver.from_schema(schema, store=schema_store)

    try:
        result = validate(instance=json_data, schema=schema, resolver=resolver)
        return True, None
    except ValidationError as err:
        return False, err.message

<IPython.core.display.Javascript object>

In [38]:
# open schema
with open(os.path.join("schema", "effort_hours-report.json"), "r") as file:
    schema = json.load(file)

<IPython.core.display.Javascript object>

In [41]:
assert (True, None) == validate_json(schema, report_dict)

<IPython.core.display.Javascript object>

In [40]:
fail on purpose

SyntaxError: invalid syntax (3640127853.py, line 1)

ERROR:root:Cannot parse: 1:5: fail on purpose
Traceback (most recent call last):
  File "C:\Users\tastephe\AppData\Local\pypoetry\Cache\virtualenvs\mondaydotcom-notebooks-7FfDI7tN-py3.9\lib\site-packages\lab_black.py", line 218, in format_cell
    formatted_code = _format_code(cell)
  File "C:\Users\tastephe\AppData\Local\pypoetry\Cache\virtualenvs\mondaydotcom-notebooks-7FfDI7tN-py3.9\lib\site-packages\lab_black.py", line 29, in _format_code
    return format_str(src_contents=code, mode=FileMode())
  File "src\black\__init__.py", line 1163, in format_str
  File "src\black\__init__.py", line 1173, in _format_str_once
  File "src\black\parsing.py", line 128, in lib2to3_parse
black.parsing.InvalidInput: Cannot parse: 1:5: fail on purpose


Now, Smartsheet's turn?

In [None]:
# smartsheet can have duplicate sheet names,
# so best not to rely on them
time_sheet_id = 3567675495475076
time_sheet = ss_client.Sheets.get_sheet(time_sheet_id)

In [None]:
# break down the cell IDs into a quick lookup box
cell_ids = {}
for column in time_sheet.columns:
    my_column = column.to_dict()
    cell_ids[my_column["title"]] = my_column["id"]
cell_ids

Add the records to Smartsheet

In [None]:
rows = []
for k, v in report_df.to_dict("index").items():

    row = ss_client.models.row.Row()

    row.cells.append(
        {"column_id": cell_ids["Client/Client"], "value": v["Client Name"]}
    )
    if v.get("Grant Number"):
        row.cells.append(
            {"column_id": cell_ids["Grant Proposal #"], "value": v["Grant Number"]}
        )
    row.cells.append(
        {"column_id": cell_ids["Project Title"], "value": v["Project Title"]}
    )
    row.cells.append(
        {"column_id": cell_ids["Month-end Date"], "value": v["Month Ending Date"]}
    )
    row.cells.append(
        {"column_id": cell_ids["Completed Hours"], "value": v["Hours"]}
    )
    row.cells.append({"column_id": cell_ids["Resource"], "value": v["Resource"]})
    row.cells.append({"column_id": cell_ids["MDC Client ID"], "value": v["MDC Client ID"]})
    row.cells.append({"column_id": cell_ids["MDC Project ID"], "value": v["MDC Project ID"]})

    row.to_bottom = True
    rows.append(row)

In [None]:
result = None
if rows and post_records:
    logger.info("Adding %s posted rows to SmartSheet", len(rows))
    result = ss_client.Sheets.add_rows(time_sheet_id, rows)

result

In [None]:
# get the row ids and create a Series
if result:
    my_list = []
    for row in result.to_dict()["data"]:
        my_list.append(row["id"])
        
    row_series = pd.Series(my_list, name="row_id", dtype=np.int64)
    row_series

    report_df = pd.concat([report_df, row_series], axis=1)

report_df

In [None]:
def filter_tasks(df, client, project, resource, month_end_date):
    """Get the journal tasks based on the details we'll send to smartsheet."""
    return df[
        (
            (df["Client Name"] == client)
            & (df["Project Title"] == project)
            & (df["Resource"] == resource)
            & (df["Month Ending Date"] == month_end_date)
        )
    ]

In [None]:
# create a dictionary to make attaching the files easier
new_dict = {}

# ensure all of the row_ids are filled in
if report_df['row_id'].all():

    for k, v in report_df.to_dict("index").items():
        row_id = v["row_id"]

        # add the filtered tasks to a list
        new_dict[row_id] = filter_tasks(
            df,
            v["Client Name"],
            v["Project Title"],
            v["Resource"],
            v["Month Ending Date"],
        )

In [None]:
if not os.path.exists("_cache"):
    os.mkdir("_cache")

In [None]:
# attach file to each record
for k, v in new_dict.items():
    filename = os.path.join("_cache", f"{k}.csv")

    # save file
    v.to_csv(filename, index=False)

    if post_records:
        with open(filename, "r") as f:
            ss_client.Attachments.attach_file_to_row(time_sheet_id, k, f)

In [None]:
if post_records:
    billable_posted = False
    for k, v in df.to_dict("index").items():
        if v["No Bill"]:
            result = conn.items.change_item_value(
                TASKS_BOARD_ID,
                v["MDC Task ID"],
                "text01",
                f"Posted - No Bill - {datetime.now()}",
            )
        else:
            result = conn.items.change_item_value(
                TASKS_BOARD_ID, v["MDC Task ID"], "text01", f"Posted - {datetime.now()}"
            )
            billable_posted = True

Delete any estimates

In [None]:
# delete the estimates only
result = None
# only do this if we had some monthly tasks to report
if billable_posted and len(report_df) > 0 and post_records:
    result = ss_client.Sheets.get_sheet(time_sheet_id, filter_id=5850658663360388)
    rows_to_delete = [x["id"] for x in result.to_dict()["rows"] if not x["filteredOut"]]
    if rows_to_delete:
        result = ss_client.Sheets.delete_rows(posted_tasks_id, rows_to_delete)
result