# Python Sandbox


## Setup

In [1]:
%load_ext autoreload
%autoreload 2

### Imports

In [2]:
import json
import sys
from pathlib import Path
from typing import Iterable

import pandas as pd

In [3]:
src_path: str = "../src"
sys.path.append(src_path)

### Global Variables

In [4]:
TCGA_PRAD_SU2C_PCF_GSE221601_ROOT: Path = Path(
    "/media/ssd/Perez/storage/TCGA_PRAD_SU2C_PCF_GSE221601"
)
GOIS: Iterable[str] = ("TPX2", "EZH2", "TROAP", "COX1", "UHRF1")

## Sandbox

For all integrative analysis results, find appereances of genes of interest.

In [None]:
relevant_files = {
    goi: {"BOTH": [], "HSPC_vs_PRIM": [], "MCRPC_vs_HSPC": []} for goi in GOIS
}

for int_analysis_file in (
    TCGA_PRAD_SU2C_PCF_GSE221601_ROOT / "integrative_analysis"
).rglob("*.csv"):
    if "pathway" in int_analysis_file.name:
        continue

    results_df = pd.read_csv(int_analysis_file)
    if "SYMBOL" not in results_df.columns:
        continue

    hspc_vs_prim_col = next(
        col for col in results_df.columns if col.startswith("HSPC_vs_PRIM")
    )
    hspc_vs_prim_col_idx = results_df.columns.get_loc(hspc_vs_prim_col)
    mcrpc_vs_hspc_col = next(
        col for col in results_df.columns if col.startswith("MCRPC_vs_HSPC")
    )
    mcrpc_vs_hspc_col_idx = results_df.columns.get_loc(mcrpc_vs_hspc_col)

    for goi in GOIS:
        if goi in results_df["SYMBOL"].tolist():
            goi_rows = results_df[results_df["SYMBOL"] == goi].squeeze()
            if (
                goi_rows.iloc[hspc_vs_prim_col_idx]
                & goi_rows.iloc[mcrpc_vs_hspc_col_idx]
            ):
                relevant_files[goi]["BOTH"].append(str(int_analysis_file))
            if (
                goi_rows.iloc[hspc_vs_prim_col_idx]
                & ~goi_rows.iloc[mcrpc_vs_hspc_col_idx]
            ):
                relevant_files[goi]["HSPC_vs_PRIM"].append(str(int_analysis_file))
            if (
                ~goi_rows.iloc[hspc_vs_prim_col_idx]
                & goi_rows.iloc[mcrpc_vs_hspc_col_idx]
            ):
                relevant_files[goi]["MCRPC_vs_HSPC"].append(str(int_analysis_file))

print(relevant_files)
with open("relevant_files.json", "w") as f:
    json.dump(relevant_files, f, indent=4)