In [1]:
import argparse
import pathlib

import duckdb
import pandas as pd

# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
drug_information = pd.read_csv(
    pathlib.Path(
        f"{root_dir}/4.processing_image_based_profiles/data/drugs/drug_information.csv"
    )
)
drug_information.head()

Unnamed: 0,Treatment,Pathway,Function,Class,Therapeutic Categories
0,ARV-825,C-MYC,Degrades BRD4,PROTAC,PROTAC
1,Binimetinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
2,Cabozantinib,MEK1/2,Inhibits FLT3; c-KIT; c-RET; AXL; c-MET; VEGFR...,Small Molecule,Kinase Inhibitor
3,Copanlisib,mTOR;NF-kB; MEK1/2,Inhibits PI3K,Small Molecule,Kinase Inhibitor
4,DMSO,Control,Control,Control,Control


In [3]:
profiles_dir = pathlib.Path(f"{root_dir}/data/all_patient_profiles/").resolve(
    strict=True
)
# get a list of all profiles
profiles = [f for f in profiles_dir.glob("*.parquet") if f.is_file()]
for profile in profiles:
    df = pd.read_parquet(profile)
    df["treatment"] = df["treatment"].replace("STAURO", "Staurosporine")
    df["treatment"] = df["treatment"].replace("Staurosporine ", "Staurosporine")
    df = df.merge(
        drug_information, how="left", left_on="treatment", right_on="Treatment"
    )
    df.to_parquet(profile, index=False)

In [4]:
df

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Well,parent_organoid,Colocalization_Nuclei_AGP.BF_MEDIAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MEAN.OVERLAP.COEFF,...,Texture_Cytoplasm_Mito_Sum.Average_256.3,Texture_Cytoplasm_Mito_Sum.Variance_256.3,Area.Size.Shape_Cytoplasm_VOLUME,Area.Size.Shape_Cytoplasm_EXTENT,Area.Size.Shape_Cytoplasm_EULER.NUMBER,Treatment,Pathway,Function,Class,Therapeutic Categories
0,NF0014,25,uM,10,Mirdametinib,G8-1,G8,40,-0.962054,-0.265852,...,0.009685,0.132669,-0.063515,-0.965185,-0.394215,Mirdametinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
1,NF0014,51,uM,10,Mirdametinib,G8-1,G8,40,0.424670,-0.271004,...,0.078748,0.197211,-0.039729,-0.529325,-0.810092,Mirdametinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
2,NF0014,76,uM,10,Mirdametinib,G8-1,G8,40,-0.713252,-0.271616,...,-0.067183,-0.025754,-0.088707,-0.101508,-0.061513,Mirdametinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
3,NF0014,102,uM,10,Mirdametinib,G8-1,G8,40,-0.409968,-0.153681,...,-0.320329,-0.502716,-0.156492,-0.131695,-0.103101,Mirdametinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
4,NF0014,127,uM,10,Mirdametinib,G8-1,G8,40,1.121776,-0.271732,...,-0.181596,-0.224879,-0.120579,0.584169,-0.851680,Mirdametinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11227,SARCO361,102,uM,1,Trametinib,C10-1,C10,-1,0.839536,-0.153940,...,-0.250571,0.125689,-0.655757,-0.227980,0.588797,Trametinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
11228,SARCO361,153,uM,1,Trametinib,C10-1,C10,-1,0.416243,-0.151759,...,1.348868,0.729634,1.294135,2.139031,1.241738,Trametinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
11229,SARCO361,204,uM,1,Trametinib,C10-1,C10,-1,-0.311504,-0.182692,...,-0.877616,-0.876408,-0.308250,1.433118,1.241738,Trametinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
11230,SARCO361,85,uM,1,Trametinib,D10-3,D10,-1,-1.749370,0.151984,...,-1.290875,-1.002629,-1.154379,0.088987,1.335016,Trametinib,MEK1/2,Inhibits MEK1/2,Small Molecule,Kinase Inhibitor
