In [57]:
import json

import numpy as np
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from survhive.cv import regularisation_path, regularisation_path_precond
from survhive.cox import CoxPHElasticNet, CoxPHPrecond
from survhive.utils import transform_survival, transform_preconditioning
import timeit
from survhive.utils import transform_survival, transform_preconditioning
from sklearn.decomposition import PCA
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.util import Surv

In [58]:
with open(f"../config.json") as f:
    config = json.load(f)

In [59]:
elastic_net_timing_one = {cancer: [] for cancer in config["datasets"]}
elastic_net_timing_zero_nine = {cancer: [] for cancer in config["datasets"]}
elastic_net_timing_zero_one = {cancer: [] for cancer in config["datasets"]}

precond_timing_one = {cancer: [] for cancer in config["datasets"]}
precond_timing_zero_five = {cancer: [] for cancer in config["datasets"]}
precond_timing_zero = {cancer: [] for cancer in config["datasets"]}

pipe_efron = make_pipeline(
    StandardScaler(),
    PCA(n_components=16),
    CoxPHSurvivalAnalysis(ties="efron")
)

In [60]:
#for cancer in config["datasets"]:
cancer = "BLCA"
data = pd.read_csv(f"../data/processed/TCGA/{cancer}_data_preprocessed.csv").iloc[:, 1:]
X = StandardScaler().fit_transform(data.iloc[:, 3:])
y = data.iloc[:, :2]
pipe_efron.fit(data.iloc[:, 3:], Surv().from_arrays(time=y["OS_days"].values, event=y["OS"].values))
eta_hat = pipe_efron.predict(data.iloc[:, 3:])

In [61]:
regularisation_path_precond(
    X=pd.DataFrame(X)
    .iloc[
        np.argsort(y["OS_days"], kind="stable"),
    ]
    .to_numpy(),
    y=transform_preconditioning(
        time=y["OS_days"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
        event=y["OS"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
        y_teacher=eta_hat
    ),
    X_test=pd.DataFrame(X)
    .iloc[
        np.argsort(y["OS_days"], kind="stable"),
    ]
    .to_numpy(),
    model=CoxPHPrecond(
       tie_correction="efron", 
        alpha=0.0, 
        tau=0.0, 
        maxiter=1000,
        rtol=1e-6,
        verbose=0,
        default_step_size=1.0,
        check_global_kkt=True
    ),
    tau=1.0,
    eps=0.05,
    n_alphas=100,
    max_first=True,
)


(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[ 0.00000000e+00, -9.34135159e-03, -1.83956531e-02, ...,
          1.76811223e+00,  1.76625663e+00,  1.75743284e+00],
        [ 0.00000000e+00,  2.17629658e-02,  4.28571781e-02, ...,
         -1.46537790e+00, -1.45231918e+00, -1.44789442e+00],
        [ 0.00000000e+00, -2.71101944e-03, -5.33873207e-03, ...,
         -4.89112627e+00, -4.93853074e+00, -4.98877323e+00],
        ...,
        [ 0.00000000e+00,  1.15148525e-02,  2.26758655e-02, ...,
         -8.12199745e+00, -8.29038828e+00, -8.

In [62]:
#for cancer in config["datasets"]:
for cancer in ["BLCA"]:
    data = pd.read_csv(f"../data/processed/TCGA/{cancer}_data_preprocessed.csv").iloc[:, 1:]
    X = StandardScaler().fit_transform(data.iloc[:, 3:])
    y = data.iloc[:, :2]
    pipe_efron.fit(data.iloc[:, 3:], Surv().from_arrays(time=y["OS_days"].values, event=y["OS"].values))
    eta_hat = pipe_efron.predict(data.iloc[:, 3:])
    
    # Elastic net timings
    for i in range(10):
        start = timeit.default_timer()
        
        regularisation_path(
            X=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            y=transform_survival(
                time=y["OS_days"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
                event=y["OS"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
            ),
            X_test=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            model=CoxPHElasticNet(
                alpha=0.0,
                l1_ratio=1.0,

                warm_start=True,
                n_irls_iter=5,
                tol=0.0001,
                verbose=0,
                tie_correction="efron",
                inner_solver_max_iter = 100,
                inner_solver_max_epochs = 50000,
                inner_solver_p0 = 10,
                inner_solver_prune = True,
                check_global_kkt=True,
            ),
            l1_ratio=1.0,
            eps=0.05,
            n_alphas=100,
            alphas=None,
            max_first=True
        )
        end = timeit.default_timer()
        elastic_net_timing_one[cancer].append(end - start)
    for i in range(10):
        start = timeit.default_timer()
        
        regularisation_path(
            X=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            y=transform_survival(
                time=y["OS_days"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
                event=y["OS"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
            ),
            X_test=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            model=CoxPHElasticNet(
                alpha=0.0,
                l1_ratio=0.9,

                warm_start=True,
                n_irls_iter=5,
                tol=0.0001,
                verbose=0,
                tie_correction="efron",
                inner_solver_max_iter = 100,
                inner_solver_max_epochs = 50000,
                inner_solver_p0 = 10,
                inner_solver_prune = True,
                check_global_kkt=True,
            ),
            l1_ratio=0.9,
            eps=0.05,
            n_alphas=100,
            alphas=None,
            max_first=True
        )
        end = timeit.default_timer()
        elastic_net_timing_zero_nine[cancer].append(end - start)
    for i in range(10):
        start = timeit.default_timer()
        
        regularisation_path(
            X=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            y=transform_survival(
                time=y["OS_days"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
                event=y["OS"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
            ),
            X_test=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            model=CoxPHElasticNet(
                alpha=0.0,
                l1_ratio=0.1,

                warm_start=True,
                n_irls_iter=5,
                tol=0.0001,
                verbose=0,
                tie_correction="efron",
                inner_solver_max_iter = 100,
                inner_solver_max_epochs = 50000,
                inner_solver_p0 = 10,
                inner_solver_prune = True,
                check_global_kkt=True,
            ),
            l1_ratio=0.1,
            eps=0.05,
            n_alphas=100,
            alphas=None,
            max_first=True
        )
        end = timeit.default_timer()
        elastic_net_timing_zero_one[cancer].append(end - start)
    
    # Preconditioning timings
    for i in range(10):
        start = timeit.default_timer()
        
        regularisation_path_precond(
            X=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            y=transform_preconditioning(
                time=y["OS_days"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
                event=y["OS"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
                y_teacher=eta_hat
            ),
            X_test=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            model=CoxPHPrecond(
               tie_correction="efron", 
                alpha=0.0, 
                tau=1.0, 
                maxiter=1000,
                rtol=1e-6,
                verbose=0,
                default_step_size=1.0,
                check_global_kkt=True
            ),
            tau=1.0,
            eps=0.05,
            n_alphas=100,
            max_first=True,
        )

        end = timeit.default_timer()
        precond_timing_one[cancer].append(end - start)
    for i in range(10):
        start = timeit.default_timer()
        
        regularisation_path_precond(
            X=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            y=transform_preconditioning(
                time=y["OS_days"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
                event=y["OS"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
                y_teacher=eta_hat
            ),
            X_test=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            model=CoxPHPrecond(
               tie_correction="efron", 
                alpha=0.0, 
                tau=0.5, 
                maxiter=1000,
                rtol=1e-6,
                verbose=0,
                default_step_size=1.0,
                check_global_kkt=True
            ),
            tau=0.5,
            eps=0.05,
            n_alphas=100,
            max_first=True,
        )

        end = timeit.default_timer()
        precond_timing_zero_five[cancer].append(end - start)    
    for i in range(10):
        start = timeit.default_timer()
        
        regularisation_path_precond(
            X=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            y=transform_preconditioning(
                time=y["OS_days"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
                event=y["OS"].to_numpy()[np.argsort(y["OS_days"], kind="stable")],
                y_teacher=eta_hat
            ),
            X_test=pd.DataFrame(X)
            .iloc[
                np.argsort(y["OS_days"], kind="stable"),
            ]
            .to_numpy(),
            model=CoxPHPrecond(
               tie_correction="efron", 
                alpha=0.0, 
                tau=0.0, 
                maxiter=1000,
                rtol=1e-6,
                verbose=0,
                default_step_size=1.0,
                check_global_kkt=True
            ),
            tau=0.0,
            eps=0.05,
            n_alphas=100,
            max_first=True,
        )

        end = timeit.default_timer()
        precond_timing_zero[cancer].append(end - start)    
    

pd.DataFrame(elastic_net_timing_one).to_csv(
    "../results/elastic_net_timing_one.csv", index=False
)
pd.DataFrame(elastic_net_timing_zero_nine).to_csv(
    "../results/elastic_net_timing_zero_nine.csv", index=False
)
pd.DataFrame(elastic_net_timing_zero_one).to_csv(
    "../results/elastic_net_timing_zero_one.csv", index=False
)

pd.DataFrame(precond_timing_zero).to_csv(
    "../results/precond_timing_zero.csv", index=False
)
pd.DataFrame(precond_timing_zero_five).to_csv(
    "../results/precond_timing_zero_five.csv", index=False
)
pd.DataFrame(precond_timing_one).to_csv(
    "../results/precond_timing_one", index=False
)



In [70]:
pd.DataFrame({key: val for key, val in precond_timing_zero_five.items() if key == "BLCA"})

Unnamed: 0,BLCA
0,13.925373
1,10.590973
2,8.565667
3,9.334565
4,9.514502
5,14.258766
6,10.661658
7,10.125301
8,12.021422
9,9.214156


63100.301986833