In [1]:
import json

import numpy as np
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from survhive.cv_models import CoxPHElasticNetCV, CoxPHPrecondCV
from survhive.utils import transform_survival, transform_preconditioning
from sklearn.decomposition import PCA
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.util import Surv

In [2]:
with open(f"../config.json") as f:
    config = json.load(f)

In [3]:
pipe_efron = make_pipeline(
    StandardScaler(),
    PCA(n_components=16),
    CoxPHSurvivalAnalysis(ties="efron")
)

pipe_breslow = make_pipeline(
    StandardScaler(),
    PCA(n_components=16),
    CoxPHSurvivalAnalysis(ties="breslow")
)

In [4]:
precond_efron = {}
precond_breslow = {}


In [5]:
for cancer in config["datasets"]:
    print(f"Starting: {cancer}")
    train_splits = pd.read_csv(f"../data/splits/TCGA/{cancer}_train_splits.csv")
    test_splits = pd.read_csv(f"../data/splits/TCGA/{cancer}_test_splits.csv")
    data = pd.read_csv(f"../data/processed/TCGA/{cancer}_data_preprocessed.csv").iloc[:, 1:]
    X_ = data.iloc[:, 3:]
    y_ = Surv().from_arrays(time=data["OS_days"].values, event=data["OS"].values.astype(bool))
    for split in range(25):
        print(f"Starting split: {split+1} / 25")
        train_ix = train_splits.iloc[split, :].dropna().to_numpy().astype(int)
        X_train = X_.iloc[train_ix, :].copy().reset_index(drop=True)
        y_train = y_[train_ix].copy()
        if split == 0:
            precond_efron[cancer] = {}
            precond_breslow[cancer] = {}

        pipe_efron.fit(X_train, y_train)
        precond_efron[cancer][split] = pipe_efron.predict(X_train)
        
        pipe_breslow.fit(X_train, y_train)
        precond_breslow[cancer][split] = pipe_breslow.predict(X_train)
            
    pd.concat([pd.DataFrame(precond_efron[cancer][i]) for i in range(25)], axis=1).to_csv(
        f"../results/teacher/efron_{cancer}.csv", index=False
    )
    
    pd.concat([pd.DataFrame(precond_breslow[cancer][i]) for i in range(25)], axis=1).to_csv(
        f"../results/teacher/breslow_{cancer}.csv", index=False
    )

Starting: BLCA
Starting split: 1 / 25
Starting split: 2 / 25
Starting split: 3 / 25
Starting split: 4 / 25
Starting split: 5 / 25
Starting split: 6 / 25
Starting split: 7 / 25
Starting split: 8 / 25
Starting split: 9 / 25
Starting split: 10 / 25
Starting split: 11 / 25
Starting split: 12 / 25
Starting split: 13 / 25
Starting split: 14 / 25
Starting split: 15 / 25
Starting split: 16 / 25
Starting split: 17 / 25
Starting split: 18 / 25
Starting split: 19 / 25
Starting split: 20 / 25
Starting split: 21 / 25
Starting split: 22 / 25
Starting split: 23 / 25
Starting split: 24 / 25
Starting split: 25 / 25
Starting: BRCA



KeyboardInterrupt

