In [None]:
import os
import pandas as pd
from datetime import datetime

In [None]:
current_date = datetime.now().strftime("%d_%m_%Y")
print(current_date)

folder_name = f"dashboard_files_{current_date}"
folder_path = os.path.join(r"C:\Users\O304312\Documents\Dashboard Files", folder_name)
if not os.path.exists(folder_path):
    os.makedirs(folder_path)
    print(f"Folder created at: {folder_path}")
else:
    print(f"Folder already exists: {folder_path}")

In [None]:
today = datetime.today()
first_of_this_month = today.replace(day=1)
first_of_last_month = (first_of_this_month - pd.DateOffset(months=1)).replace(day=1)

month = first_of_last_month.strftime("%B").lower()
print(month)
cap_month = month.capitalize()
print(cap_month)

In [None]:
snapshot_ds = r"J:\ONELINK\Snapshot Datasources"

In [None]:
visit_data = pd.read_csv(
    r"C:\Users\O304312\Downloads\Data Table Visit Data.csv",
    low_memory=False,
)

print(visit_data.columns)

In [None]:
visit_data = visit_data[
    [
        "Site Name",
        "Site Study Code",
        "Subject ID",
        "Participant Status",
        "Participant Protocol Arm",
        "Visit Period",
        "Visit Name",
        "Visit Status",
        "Visit Completed Date",
    ]
]
print(visit_data.dtypes)

visit_data = visit_data.rename(
    columns={
        "Site Name": "Site",
        "Site Study Code": "Site Study ID",
        "Participant Protocol Arm": "Arm",
    }
)

# Convert Visit Completed Date to datetime
visit_data["Visit Completed Date"] = pd.to_datetime(
    visit_data["Visit Completed Date"], errors="coerce"
)

visit_data = visit_data[visit_data["Visit Completed Date"] < first_of_last_month]

visit_data.to_csv(f"{folder_path}/Visit Data_data.csv", index=False)

In [None]:
study_accruals = pd.read_csv(
    r"C:\Users\O304312\Downloads\Data Table Study Accrual.csv",
    low_memory=False,
)

print(study_accruals.columns)

In [None]:
study_accruals = study_accruals[
    [
        "Site Name",
        "Sponsor Site Number",
        "Network Study Code",
        "Network Study Status",
        "Site Study Code",
        "Site Study Status",
        "Study Title",
        "Study Phase",
        "Study Type",
        "Funding Source",
        "Site IRB Name",
        "Site IRB Status",
        "Site IRB Expiration Date",
        "IRB Submission Number",
        "Principal Investigators",
        "Lead Coordinators",
        "Site Study Start Date",
        "Site Study End Date",
        "Site Enrollment Start Date",
        "Site Enrollment End Date",
        "Study Therapeutic Areas",
        "Study Therapeutic Area Details",
        "Study Sponsors",
        "Site Enrollment Target",
        "CRO Name",
        "Total Patients Prescreened Sum",
        "Failed Prescreening Sum",
        "Total Participants Sum",
        "In Screening Sum",
        "Failed Screening Sum",
        "In Treatment Sum",
        "Completed Sum",
        "Follow Up Sum",
        "Off Study Sum",
        "Long Term Follow Up Sum",
    ]
]

study_accruals = study_accruals.rename(
    columns={
        "Site Name": "Site",
        "Site Study Code": "Site Study ID",
        "Site Enrollment Target": "Site Enrollment Targets",
    }
)

print(study_accruals.dtypes)

study_accruals.to_csv(f"{folder_path}/Study Accrual Data.csv", index=False)

In [None]:
partis_info = pd.read_csv(
    r"C:\Users\O304312\Downloads\Data Table Participant Information.csv",
    low_memory=False,
)

print(partis_info.columns)

In [None]:
partis_info = partis_info[
    [
        "Subject ID",
        "Site Study Code",
        "Participant Status",
        "Participant Status Date",
        "Participant Latest Consent Date",
        "Participant Consent Status",
    ]
]

partis_info["Participant Latest Consent Date"] = partis_info[
    "Participant Latest Consent Date"
].replace(["", "0", 0], pd.NA)
if "Latest Screen Failure Date" in partis_info.columns:
    partis_info["Participant Latest Consent Date"] = partis_info[
        "Participant Latest Consent Date"
    ].fillna(partis_info["Latest Screen Failure Date"])


partis_info = partis_info.rename(
    columns={
        "Site Study Code": "Site Study ID",
        "Participant Status Date": "Current Status Date",
        "Participant Latest Consent Date": "Current Consent Date",
        "Participant Consent Status": "Consent Result",
    }
)

partis_info["Current Consent Date"] = pd.to_datetime(
    partis_info["Current Consent Date"], errors="coerce"
)

partis_info = partis_info[partis_info["Current Consent Date"] < first_of_last_month]

partis_info.to_csv(
    f"{folder_path}/{cap_month}Participant Information Data Sheet.csv",
    index=False,
)

In [None]:
transactions = pd.read_csv(
    r"C:\Users\O304312\Downloads\Transaction.csv", low_memory=False
)

transactions = transactions.drop(columns=["Unnamed: 0"])
print(transactions.columns)

In [None]:
transactions["Accountable Completed Date"] = pd.to_datetime(
    transactions["Accountable Completed Date"], errors="coerce"
)

transactions["Accountable Completed Date"] = transactions[
    "Accountable Completed Date"
].replace(["", "0", 0], pd.NA)

transactions = transactions[
    (transactions["Accountable Completed Date"] < first_of_last_month)
    | (transactions["Accountable Completed Date"].isnull())
]

mask = transactions["Accountable Completed Date"].isna()
transactions.loc[mask, "Accountable Completed Date"] = pd.to_datetime(
    transactions.loc[mask, "Transaction Created Date"], errors="coerce"
)

num_blank_rows = transactions["Accountable Completed Date"].isna().sum()
print(f"Number of blank rows in 'Accountable Completed Date': {num_blank_rows}")

transactions = transactions.drop(columns=["Transaction Created Date"])

transactions["Account Code"] = transactions["Account Code"].replace(["", "0", 0], pd.NA)

transactions["Account Code"] = transactions["Account Code"].fillna(47205)

transactions.loc[
    transactions["Transaction Line Item"] == "StudyActivity", "Account Code"
] = 47206


# Next Steps: merge with DOR data in chen file to get overhead rate and then multiply all adhoc changes by oh rate to update cost

In [None]:
import glob
import re

excel_files = glob.glob(
    r"J:\ADMIN-eFILES\CHEN_W154867_VXC\zzz_CTP Projects List\DOR Finance CTP List\CTP Project List Reconciled *.xlsx"
)


def extract_date(filename):
    match = re.search(r"(\d{6})\.xlsx$", filename)
    return match.group(1) if match else ""


excel_files_sorted = sorted(excel_files, key=extract_date, reverse=True)

if excel_files_sorted:
    latest_excel = excel_files_sorted[0]
    dor_ctp_projects = pd.read_excel(latest_excel)
    print(f"Loaded file: {latest_excel}")
else:
    print("No matching Excel files found.")

In [None]:
print(transactions.columns)
print(dor_ctp_projects.columns)
dor_ctp_projects = dor_ctp_projects[["ProjectID", "IDC Rate"]]

dor_ctp_projects = dor_ctp_projects.drop_duplicates()

In [None]:
transactions = pd.merge(
    transactions,
    dor_ctp_projects,
    left_on="Service Line Code",
    right_on="ProjectID",
    how="left",
)

transactions["Transaction Amount"] = pd.to_numeric(
    transactions["Transaction Amount"], errors="coerce"
)
transactions["IDC Rate"] = pd.to_numeric(transactions["IDC Rate"], errors="coerce")


mask = (transactions["Transaction Line Item"] == "AdHoc") & (
    transactions["Account Code"] == 47205
)
transactions.loc[mask, "Transaction Amount"] = transactions.loc[
    mask, "Transaction Amount"
] * (1 + transactions.loc[mask, "IDC Rate"].fillna(0))

In [None]:
project_id_list = pd.read_csv(
    "J:\\ONELINK//Snapshot Datasources//SignalPath ProjectID Lookup.csv",
    low_memory=False,
    encoding="cp1252",
)
print(project_id_list.columns)

transactions = pd.merge(
    transactions,
    project_id_list,
    left_on="Service Line Code",
    right_on="Project ID",
    how="left",
)


print(transactions.columns)

list_csv = pd.read_csv(
    r"J:\ONELINK\Snapshot Datasources\04 2025 April\_Transaction Data april 2025.csv"
)


columns_list = list_csv.columns.tolist()

print(columns_list)
transactions["Transaction Created Date"] = transactions["Accountable Completed Date"]
transactions = transactions.rename(
    columns={
        "Transaction Amount": "Amount",
        "Site Study ID": "Site Protocol Version Desc",
    }
)

transactions = transactions.loc[:, columns_list]

In [None]:
transactions.to_csv(
    f"{folder_path}/_Transaction Data {month} 2025.csv",
    index=False,
)