In [None]:
root =  "/home/brainmappinglab/Desktop/PROJECTS/Glioblastoma-TractDensity_Surival-Prognosis" # "D:\JoanFR_Sano"
MNI_DIR = "/home/brainmappinglab/Desktop/PROJECTS/MNI_ICBM_2009b_NLIN_ASYM" # "C:/Users/user/Documents/Data/MNI"
fmt = "svg"
dpi = 300
stream_th = 0

In [None]:
import pandas as pd
import numpy as np
import os
import glob
import matplotlib.pylab as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.cm as cm
import matplotlib.colors as mcolors
from tqdm import tqdm
import nibabel as nib
import nilearn.plotting as plotting

from scipy.stats import pearsonr, ttest_ind, mannwhitneyu, zscore, sem, f_oneway

import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multitest import fdrcorrection

from sksurv.nonparametric import kaplan_meier_estimator
from sksurv.compare import compare_survival
from sksurv.linear_model import CoxPHSurvivalAnalysis

from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans

import statsmodels.formula.api as smf

In [None]:
figs_folder = os.path.join(root,"Relationships-features_anatomy-morphology")
daysXmonth = 365/12
voxel_size = (0.5**3) * (1/1000) # 0.5 (mm³/voxel) X 0.001 (cm³/mm³)   

os.makedirs(figs_folder, exist_ok=True) 

# Data

In [None]:
root_ucsf = os.path.join(root,"Glioblastoma_UCSF-PDGM_v3-20230111")
TDstats_ucsf = pd.read_csv(os.path.join(root_ucsf, f"TDMaps_Grade-IV/demographics-TDMaps_streamTH-{stream_th}.csv"))
morphology_ucsf = pd.read_csv(os.path.join(root_ucsf, f"TDMaps_Grade-IV/morphology-tissues.csv"))

TDstats_ucsf = TDstats_ucsf.loc[TDstats_ucsf["Final pathologic diagnosis (WHO 2021)"]=="Glioblastoma  IDH-wildtype"] 
TDstats_ucsf = TDstats_ucsf.loc[TDstats_ucsf["OS"].fillna('unknown')!='unknown']
morphology_ucsf = morphology_ucsf.loc[morphology_ucsf["Final pathologic diagnosis (WHO 2021)"]=="Glioblastoma  IDH-wildtype"] 
morphology_ucsf = morphology_ucsf.loc[morphology_ucsf["OS"].fillna('unknown')!='unknown']

common_cols = ['ID', 'Sex', 'Age at MRI', 'WHO CNS Grade', 
       'Final pathologic diagnosis (WHO 2021)', 'MGMT status', 'MGMT index',
       '1p/19q', 'IDH', '1-dead 0-alive', 'OS', 'EOR',
       'Biopsy prior to imaging', 'BraTS21 ID', 'BraTS21 Segmentation Cohort',
       'BraTS21 MGMT Cohort', '# Labels'
]
ucsf = pd.merge(TDstats_ucsf, morphology_ucsf, on=common_cols)

print(ucsf["1-dead 0-alive"].value_counts().sum())
censored = (ucsf["1-dead 0-alive"]==0).sum()
all_ucsf = ucsf["1-dead 0-alive"].value_counts().sum()
print(f"Precentage of censoring: {round(100*censored/all_ucsf,2)}%")

In [None]:
root_upenn = os.path.join(root,"Glioblastoma_UPENN-GBM_v2-20221024")
TDstats_upenn = pd.read_csv(os.path.join(root_upenn, f"TDMaps_IDH1-WT/demographics-TDMaps_streamTH-{stream_th}.csv"))
morphology_upenn = pd.read_csv(os.path.join(root_upenn, f"TDMaps_IDH1-WT/morphology-tissues.csv"))
    
common_cols = ["ID", "Gender", "Age_at_scan_years", "Survival_from_surgery_days_UPDATED", "Survival_Status", "MGMT", "KPS", "GTR_over90percent"]
upenn = pd.merge(TDstats_upenn, morphology_upenn, on=common_cols)

print(upenn["Survival_Status"].value_counts().sum())
censored = (upenn["Survival_Status"]==0).sum()
all_upenn = upenn["Survival_Status"].value_counts().sum()
print(f"Precentage of censoring: {round(100*censored/all_upenn,2)}%")

In [None]:
z_transform = 0

# UCSF
df_ucsf = pd.DataFrame({
    "ID": ucsf["ID"].values,
    "sex": ucsf["Sex"].values,
    "age": ucsf["Age at MRI"].values,
    "site": np.ones(len(ucsf), dtype=int),  # UCSF = 1
    "volume": ucsf["Whole tumor size (voxels)"].values * voxel_size,
    "ltdi": ucsf["Whole lesion TDMap"].values,
    "tdi": ucsf["Whole TDMap"].values,
    "OS": ucsf["OS"].values,
    "status": ucsf["1-dead 0-alive"]
})

# UPENN
df_upenn = pd.DataFrame({
    "ID": upenn["ID"].values,
    "sex": upenn["Gender"].values,
    "age": upenn["Age_at_scan_years"].values,
    "site": np.ones(len(upenn), dtype=int) + 1,  # UPENN = 2
    "volume": upenn["Whole tumor size (voxels)"].values * voxel_size,
    "ltdi": upenn["Whole lesion TDMap"].values,
    "tdi": upenn["Whole TDMap"].values,
    "OS": upenn["Survival_from_surgery_days_UPDATED"],
    "status": upenn["Survival_Status"]
})

DATA = pd.concat([df_ucsf, df_upenn], axis=0, ignore_index=True)

if z_transform == 1:
    for col in ["age", "volume", "ltdi", "tdi"]:
        DATA[col] = zscore(DATA[col])

# Cox proportional Hazard models

# Feature importance

# Feature selection