# Sum of Done task time and add to a dedicated board.

This is a work-in-progress, and a better way to do this is coming together soon.

In [None]:
#%load_ext nb_black
import json
import logging
from datetime import datetime, timedelta
from typing import Dict, List

import pandas as pd
import prefect
from box import Box

# uses the pretty okay SDK here: https://github.com/ProdPerfect/monday
from monday import MondayClient
from mondaydotcom_utils.formatted_value import FormattedValue, get_col_defs
from mondaydotcom_utils.time_block import TimeBlock
from mondaydotcom_utils.utilities import (
    breakout_record,
    get_items_by_board,
    validate_task_record,
)
from prefect import Flow, Parameter, task, unmapped
from prefect.executors import LocalDaskExecutor, LocalExecutor

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

In [None]:
TASKS_BOARD_ID = "1883170887"
AGREEMENTS_BOARD_ID = "1882423671"
GRANT_FTE_ALLOC_BOARD_ID = "1916646665"
GRANTS_BOARD_ID = "1941740920"
PROJECTS_BOARD_ID = "1882404316"

PROJECT_TASK_TIME_BOARD_ID = "2398200403"

# don't set this here for development work... use the secrets-<environment>.yaml files instead.
MONDAY_KEY = ""
environment = "dev"

# change these or set as papermill parameters to report on year and month
year_for_report = 2022
month_for_report = 3

In [None]:
@task
def get_monday_client(key):
    logger = prefect.context.get("logger")
    conn = MondayClient(key)
    logger.info("Monday.com client created.")

    return conn

In [None]:
@task(max_retries=3, retry_delay=timedelta(seconds=15))
def get_users(monday_conn):
    logger = prefect.context.get("logger")
    users = monday_conn.users.fetch_users()["data"]["users"]
    users_df = pd.DataFrame(users).set_index("id")
    return users_df

In [None]:
@task(max_retries=3, retry_delay=timedelta(seconds=15))
def get_projects(monday_conn):
    logger = prefect.context.get("logger")
    projects_df = get_items_by_board(monday_conn, PROJECTS_BOARD_ID)

    projects_df.drop(
        columns=[
            "Repo Description (mirror)",
            "Project Tasks",
            "Subitems",
            "Etimated Time (Hours) (mirror)",
            "Total Task Time (Hours) (mirror)",
            "Project Contacts",
            "SET Resource",
            "Timeline",
            "Customer Source",
            "Tasks Status (mirror)",
            "Dependency",
            "Date Added",
        ],
        inplace=True,
    )

    return projects_df

In [None]:
def breakout_time_sessions(row):
    """
    Break down the Monday.com time structure into something simpler for us.

    This is used with a DataFrame.apply()
    """

    mct = TimeBlock()
    mct.parse(row["Actual Time"])
    return mct.total_duration_hours, mct.time_records

In [None]:
@task(max_retries=3, retry_delay=timedelta(seconds=15))
def get_tasks(monday_conn):

    logger = prefect.context.get("logger")

    # only getting done tasks
    tasks_df = get_items_by_board(monday_conn, TASKS_BOARD_ID, "status", "Done")

    # break the time sessions out
    tasks_df[["Total Duration Hours", "Time Sessions"]] = tasks_df.apply(
        breakout_time_sessions, axis=1, result_type="expand"
    )

    # Only include Ready tasks
    tasks_df = tasks_df.loc[
        tasks_df["Integration Message"].str.startswith("Ready", na=False)
    ]

    # projects should be limited to just one, so this will bring it out of the list
    return tasks_df.explode(["Customer Project"], ignore_index=True)

In [None]:
@task
def convert_tasks_to_journal(tasks_df, users_df):
    """
    Based on how a record is broken out, create 1 to N records
    from the original Monday.com Task.
    """
    logger = prefect.context.get("logger")

    journal_items = []
    records = tasks_df.to_dict(orient="records")
    for record in records:
        new_list = breakout_record(record, users_df)

        # go through those N records, one by one
        for item in new_list:
            journal_items.append(item)

    df = pd.DataFrame(journal_items)

    # break out the actual task status also; we've already used the changed_at
    # field to help break the records out... so this can be simplified for info
    df["task_status"] = df["Status"].apply(
        lambda x: json.loads(x)["text"] if x else None
    )

    # convert to a dataframe date... a bit crude for filtering
    df["task_end_date"] = pd.to_datetime(df["task_end_date"])
    df["task_end_year"] = pd.DatetimeIndex(df["task_end_date"]).year
    df["task_end_month"] = pd.DatetimeIndex(df["task_end_date"]).month

    # Some of this helped build the record, some is just mirror or lookup gak.
    df.drop(
        columns=[
            "Actual Hours",
            "Actual Time",
            "Customer Repos",
            "Date Added",
            "Date Completed",
            "Dependencies",
            "Integration Message",
            "Subtasks",
            "Timeline",
            "Total Actual Hours (formula)",
            "Total Duration Hours",
            "Time Sessions",
            "Owner",
            "Status",
        ],
        inplace=True,
    )

    return df

In [None]:
@task
def merge_tasks_and_projects(tasks_df, projects_df):
    df = pd.merge(
        tasks_df,
        projects_df,
        how="left",
        left_on="Customer Project",
        right_on="monday_id",
    )

    df.rename(
        columns={
            "monday_id_x": "task_id",
            "monday_id_y": "project_id",
            "Title_x": "Title",
            "Title_y": "Project Title",
            "Notes_x": "Notes",
            "Notes_y": "Project Notes",
        },
        inplace=True,
    )

    return df

In [None]:
@task
def display_df(df, title, count=5):
    logger = prefect.context.get("logger")
    logger.info(title)
    # display(df.dtypes)
    display(df.head(count))

In [None]:
@task(max_retries=3, retry_delay=timedelta(seconds=15))
def create_project_task_time_item(conn, record):
    logger = prefect.context.get("logger")

    project_id = int(record["Customer Project"])
    month_end_date = str(record["Month End Date"])
    hours = str(record["Total Hours"])
    resource = str(record["owner"])

    title = f"{month_end_date},{resource},{project_id}"

    goods = {
        "numbers": hours,
        "text0": resource,
        "date4": month_end_date,
        "connect_boards": {"item_ids": [project_id]},
    }
    logger.info(goods)

    conn.items.create_item(
        board_id=PROJECT_TASK_TIME_BOARD_ID,
        group_id="topics",
        item_name=title,
        column_values=goods,
    )

In [None]:
@task
def df_to_dict(df: pd.DataFrame, showme=False) -> Dict:
    """This is most often used so Prefect can map over a dictionary."""
    logger = prefect.context.get("logger")
    result = df.to_dict("records")
    if showme:
        display(result)
    return result

In [None]:
@task(max_retries=3, retry_delay=timedelta(seconds=15))
def set_task_status_to_archive(conn, record):
    logger = prefect.context.get("logger")

    result = conn.items.change_item_value(
        TASKS_BOARD_ID, record["task_id"], "text01", f"Posted - {datetime.now()}"
    )

In [None]:
@task
def filter_project_hours(tasks_df, year, month) -> pd.DataFrame:
    """
    This filters the tasks based on the year and month we're looking for.
    It returns a dictionary of records.
    """

    mask = (tasks_df["task_end_year"] == year) & (tasks_df["task_end_month"] == month)
    out_df = tasks_df.loc[mask]

    return out_df

In [None]:
@task
def group_project_hours(tasks_df) -> Dict:
    """
    This filters the tasks based on the year and month we're looking for.
    It returns a dictionary of records.
    """

    out_df = (
        tasks_df.groupby(["owner", "Customer Project"])["hours"]
        .sum()
        .to_frame(name="Total Hours")
        .reset_index()
    )

    out_df["Month End Date"] = pd.Period(
        f"{year_for_report}-{month_for_report}", freq="M"
    ).end_time.date()

    return out_df.to_dict("records")

In [None]:
with Flow("monday.com task integration") as flow:

    # parameters
    key = Parameter("key")
    year = Parameter("year")
    month = Parameter("month")

    conn = get_monday_client(key)

    # tasks DAG

    # get users
    users_df = get_users(conn)

    # get tasks
    task_items_df = get_tasks(conn)

    # breakdown by rules into individual "journal-ready" tasks
    journalled_tasks_df = convert_tasks_to_journal(task_items_df, users_df)

    projects_df = get_projects(conn)

    project_tasks_df = merge_tasks_and_projects(journalled_tasks_df, projects_df)

    # Update the project-tasks hours board
    filtered_df = filter_project_hours(project_tasks_df, year, month)
    filtered_dict = df_to_dict(filtered_df)

    # group the project hours together
    grouped_dict = group_project_hours(filtered_df)
    inserted_rows = create_project_task_time_item.map(unmapped(conn), grouped_dict)

    # Set the tasks to posted
    posted_tasks = set_task_status_to_archive.map(unmapped(conn), filtered_dict, upstream_tasks=[inserted_rows])

In [None]:
if not MONDAY_KEY:
    # key hasn't been passed as a papermill parameter... get it from a file?
    secrets = Box.from_yaml(filename=f"secrets-{environment}.yaml")
    MONDAY_KEY = secrets.apps.monday.API_KEY

params = {"key": MONDAY_KEY, "year": year_for_report, "month": month_for_report}
state = flow.run(parameters=params, executor=LocalDaskExecutor())

In [None]:
# df = state.result[filtered_df]._result.value
# df.head()