In [1]:
import argparse
import pathlib

import duckdb
import pandas as pd

# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
drug_information = pd.read_csv(
    pathlib.Path(
        f"{root_dir}/4.processing_image_based_profiles/data/drugs/drug_information.csv"
    )
)
drug_information.head()

Unnamed: 0,Treatment,Target,Class,Therapeutic Categories
0,ARV-825,BRD4 inhibitor,PROTAC,PROTAC
1,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor
2,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor
3,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor
4,DMSO,Control,Control,Control


In [3]:
profiles_dir = pathlib.Path(f"{root_dir}/data/all_patient_profiles/").resolve(
    strict=True
)
# get a list of all profiles
profiles = [f for f in profiles_dir.glob("*.parquet") if f.is_file()]
for profile in profiles:
    df = pd.read_parquet(profile)
    df["treatment"] = df["treatment"].replace("STAURO", "Staurosporine")
    df["treatment"] = df["treatment"].replace("Staurosporine ", "Staurosporine")
    df = df.merge(
        drug_information, how="left", left_on="treatment", right_on="Treatment"
    )
    df.drop(columns=["Treatment"], inplace=True)
    print(df.shape)
    profile_output = pathlib.Path(
        profile.parent, profile.name.replace(".parquet", "_annotated_drugs.parquet")
    )
    print(f"Saving annotated profile to {profile_output}")
    df.to_parquet(profile_output, index=False)

(1481, 181)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/organoid_fs_profiles_annotated_drugs_annotated_drugs.parquet
(1481, 651)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/organoid_profiles_annotated_drugs.parquet
(392, 178)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/organoid_agg_profiles_annotated_drugs_annotated_drugs.parquet
(11232, 1933)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/sc_profiles_annotated_drugs.parquet
(395, 322)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/sc_agg_profiles_annotated_drugs.parquet
(395, 325)
Saving annotated profile to /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/all_patient_profiles/sc_a