In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, KFold
from sksurv.linear_model import CoxPHSurvivalAnalysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.colors import to_rgba
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from scipy.stats import mannwhitneyu
import seaborn as sns
from scipy.stats import linregress
from scipy.stats import pearsonr, PermutationMethod, BootstrapMethod
from sksurv.nonparametric import kaplan_meier_estimator
from sksurv.compare import compare_survival
from sksurv.linear_model import CoxPHSurvivalAnalysis
import os
from statsmodels.stats.multitest import multipletests, fdrcorrection
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--threshold", type=int, default=0, help="Streamline density threshold")
parser.add_argument("--correction", type=str, choices=["fwer","fdr"], default="fdr", help="Multiple hypotheses correction method")
args = parser.parse_args()

stream_th = args.threshold
fwer = True if args.correction=="fwer" else False
daysXmonth = 30
percentiles2check = (20,80),(25,75),(30,70),(35,65),(40,60),(45,55),(50,50)
n_resamples = 2500 # Bottstrapping and permutation of correlation values
n_perms = 5000 # Permutation of Cox Prop Hazard models
months = np.array([6,12,18,24,30,36,42,48])

nrows, ncols = 2, 5
figsize = (25,12)
figs_folder = f"StreamlineTDThreshold-{stream_th}"
os.makedirs("../Figures/TDMaps_Grade-IV/"+figs_folder, exist_ok=True)

demographics_TD = pd.read_csv(f"../Figures/TDMaps_Grade-IV/demographics-TDMaps_streamTH-{stream_th}.csv")
TDMaps_all = demographics_TD[
    [
        "OS",
        "Whole TDMap",
        "Whole lesion TDMap",
        "Core TDMap", 
        "Core lesion TDMap",
        "Non-enhancing TDMap",
        "Non-enhancing lesion TDMap",
        "Enhancing TDMap",
        "Enhancing lesion TDMap",
        "Core+Enhancing TDMap",
        "Core+Enhancing lesion TDMap"
    ]
]
life = demographics_TD["1-dead 0-alive"].values
TDMaps = TDMaps_all#.loc[life==1] 

# Clean data initially
TDMaps_final = demographics_TD[
    [
        "OS",
        "Whole TDMap",
        "Whole lesion TDMap",
        "Non-enhancing TDMap",
        "Non-enhancing lesion TDMap",
        "Enhancing TDMap",
        "Enhancing lesion TDMap",
        "Core+Enhancing TDMap",
        "Core+Enhancing lesion TDMap"
    ]
]

mask = TDMaps_final[TDMaps_final.columns].notna().all(axis=1) & ~np.isnan(life)
TDMaps_filtered = TDMaps_final.loc[mask]
life_filtered = life[mask]
features = TDMaps_filtered[TDMaps_filtered.columns[1:]].values
OS_STATS = []
OS_STATS.extend([(st, os) for st,os in zip(life_filtered,TDMaps_filtered["OS"].values)])
OS_STATS = np.array(OS_STATS, dtype=[('event', 'bool'),('time', '<f4')])


def Harrell_C_index(X, y):
    n_features = X.shape[1]
    Cscores = np.empty(n_features)
    m = CoxPHSurvivalAnalysis()
    for j in range(n_features):
        Xj = X[:, j : j + 1]
        m.fit(Xj, y)
        Cscores[j] = m.score(Xj, y)
    return Cscores

: 