In [1]:
root =  "/home/brainmappinglab/Desktop/PROJECTS/Glioblastoma-TractDensity_Surival-Prognosis" # "D:\JoanFR_Sano"
MNI_DIR = "/home/brainmappinglab/Desktop/PROJECTS/MNI_ICBM_2009b_NLIN_ASYM" # "C:/Users/user/Documents/Data/MNI"
fmt = "svg"
dpi = 300
stream_th = 0

In [45]:
import pandas as pd
import numpy as np
import os
import glob
import matplotlib.pylab as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.cm as cm
import matplotlib.colors as mcolors
from tqdm import tqdm
import nibabel as nib
import nilearn.plotting as plotting

from scipy.stats import pearsonr, ttest_ind, mannwhitneyu, zscore, sem, f_oneway

import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multitest import fdrcorrection
from statsmodels.duration.hazard_regression import PHReg

from sksurv.nonparametric import kaplan_meier_estimator
from sksurv.compare import compare_survival
from sksurv.preprocessing import OneHotEncoder
from sksurv.column import categorical_to_numeric
from sksurv.linear_model import CoxPHSurvivalAnalysis

from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans

import statsmodels.formula.api as smf

In [3]:
figs_folder = os.path.join(root,"Relationships-features_anatomy-morphology","UCSF")
daysXmonth = 365/12
voxel_size = (0.5**3) * (1/1000) # 0.5 (mm³/voxel) X 0.001 (cm³/mm³)   

os.makedirs(figs_folder, exist_ok=True) 

# Data

In [4]:
root_ucsf = os.path.join(root,"Glioblastoma_UCSF-PDGM_v3-20230111")
TDstats_ucsf = pd.read_csv(os.path.join(root_ucsf, f"TDMaps_Grade-IV/demographics-TDMaps_streamTH-{stream_th}.csv"))
morphology_ucsf = pd.read_csv(os.path.join(root_ucsf, f"TDMaps_Grade-IV/morphology-tissues.csv"))

TDstats_ucsf = TDstats_ucsf.loc[TDstats_ucsf["Final pathologic diagnosis (WHO 2021)"]=="Glioblastoma  IDH-wildtype"] 
TDstats_ucsf = TDstats_ucsf.loc[TDstats_ucsf["OS"].fillna('unknown')!='unknown']
morphology_ucsf = morphology_ucsf.loc[morphology_ucsf["Final pathologic diagnosis (WHO 2021)"]=="Glioblastoma  IDH-wildtype"] 
morphology_ucsf = morphology_ucsf.loc[morphology_ucsf["OS"].fillna('unknown')!='unknown']

common_cols = ['ID', 'Sex', 'Age at MRI', 'WHO CNS Grade', 
       'Final pathologic diagnosis (WHO 2021)', 'MGMT status', 'MGMT index',
       '1p/19q', 'IDH', '1-dead 0-alive', 'OS', 'EOR',
       'Biopsy prior to imaging', 'BraTS21 ID', 'BraTS21 Segmentation Cohort',
       'BraTS21 MGMT Cohort', '# Labels'
]
ucsf = pd.merge(TDstats_ucsf, morphology_ucsf, on=common_cols)

print(ucsf["1-dead 0-alive"].value_counts().sum())
censored = (ucsf["1-dead 0-alive"]==0).sum()
all_ucsf = ucsf["1-dead 0-alive"].value_counts().sum()
print(f"Precentage of censoring: {round(100*censored/all_ucsf,2)}%")

367
Precentage of censoring: 39.24%


In [110]:
mgmt_encoder = {"negative": 900, "positive": 3, "indeterminate": 8, np.nan: np.nan}

In [111]:
data_COX = pd.DataFrame({
    "os": ucsf["OS"].values,
    "status": ucsf["1-dead 0-alive"].values,
    "sex": ucsf["Sex"].values,
    "age": ucsf["Age at MRI"].values,
    "mgmt": [mgmt_encoder[v] for v in ucsf["MGMT status"].values],
    "mgmt_index": ucsf["MGMT index"].values,
    "eor": ucsf["EOR"].values,
    "volume": ucsf["Whole tumor size (voxels)"].values * voxel_size,
    "ltdi": ucsf["Whole lesion TDMap"].values,
    "tdi": ucsf["Whole TDMap"].values
})

# Cox proportional Hazard models

### Dropping missing values

In [112]:
data_COX_dropna = data_COX.dropna()
os_dropna = data_COX_dropna["os"].values
status_dropna = data_COX_dropna["status"].values

In [113]:
covariates = ["mgmt"]#["sex","age","mgmt","mgmt_index","eor","ltdi","volume","tdi"]
data = categorical_to_numeric(data_COX_dropna[covariates]).to_numpy()

cox_dropna = PHReg(
    os_dropna,
    data,
    status=status_dropna
)
results_dropna = cox_dropna.fit()
results_dropna.summary(yname="Overall survival", xname=covariates)

0,1,2,3
Model:,PH Reg,Sample size:,344.0
Dependent variable:,Overall survival,Num. events:,209.0
Ties:,Breslow,,

0,1,2,3,4,5,6,7
,log HR,log HR SE,HR,t,P>|t|,[0.025,0.975]
mgmt,0.0003,0.0002,1.0003,1.8248,0.0680,1.0000,1.0006


0,1,2,3
Model:,PH Reg,Sample size:,344.0
Dependent variable:,Overall survival,Num. events:,209.0
Ties:,Breslow,,

0,1,2,3,4,5,6,7
,log HR,log HR SE,HR,t,P>|t|,[0.025,0.975]
sex,0.2197,0.1471,1.2458,1.4940,0.1352,0.9338,1.6620
age,0.0350,0.0067,1.0356,5.2643,0.0000,1.0222,1.0492
mgmt,0.0621,0.1912,1.0640,0.3245,0.7455,0.7315,1.5478
mgmt_index,-0.0334,0.0138,0.9671,-2.4156,0.0157,0.9413,0.9937
eor,0.7644,0.1143,2.1477,6.6871,0.0000,1.7166,2.6870
ltdi,0.0643,0.0384,1.0664,1.6760,0.0937,0.9892,1.1497


# Feature importance

# Feature selection