In [1]:
import argparse
import pathlib

import duckdb
import pandas as pd

# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
drug_information = pd.read_csv(
    pathlib.Path(
        f"{root_dir}/4.processing_image_based_profiles/data/drugs/drug_information.csv"
    )
)
drug_information.head()

Unnamed: 0,Treatment,Pathway,Function,Class,Therapeutic Categories
0,ARV-825,C-MYC,Degrades BRD4,PROTAC,PROTAC
1,Binimetinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
2,Cabozantinib,MEK1/2,Inhibits FLT3; c-KIT; c-RET; AXL; c-MET; VEGFR...,Small Molecule,Kinase Inhibitor
3,Copanlisib,mTOR;NF-kB; MEK1/2,Inhibits PI3K,Small Molecule,Kinase Inhibitor
4,DMSO,Control,Control,Control,Control


In [3]:
profiles_dir = pathlib.Path(f"{root_dir}/data/all_patient_profiles/").resolve(
    strict=True
)
# get a list of all profiles
profiles = [f for f in profiles_dir.glob("*.parquet") if f.is_file()]
for profile in profiles:
    df = pd.read_parquet(profile)
    df["treatment"] = df["treatment"].replace("STAURO", "Staurosporine")
    df["treatment"] = df["treatment"].replace("Staurosporine ", "Staurosporine")
    df = df.merge(
        drug_information, how="left", left_on="treatment", right_on="Treatment"
    )
    print(df.shape)
    profile_output = pathlib.Path(
        profile.parent, profile.name.replace(".parquet", "drug_annotated.parquet")
    )
    print(f"Saving annotated profile to {profile_output}")
    df.to_parquet(profile, index=False)

(1481, 653)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/organoid_profilesdrug_annotated.parquet
(11232, 1935)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/sc_profilesdrug_annotated.parquet
(395, 324)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/sc_agg_profilesdrug_annotated.parquet
(392, 177)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/organoid_agg_profilesdrug_annotated.parquet
(1481, 180)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/organoid_fs_profilesdrug_annotated.parquet
(11232, 327)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/sc_fs_profilesdrug_annotated.parquet
