### Rpy2 package

In [None]:
%load_ext rpy2.ipython

### Importing R packages

In [None]:
%R library(latentcor)
%R library(sn)

### Importing Python libraries

In [None]:
import pandas as pd
import warnings
import numpy as np
import multiprocessing
import math
from timeit import default_timer as timer
from joblib import Parallel, delayed


#Number of cores
num_cores = multiprocessing.cpu_count()

jobs=num_cores-1


import matplotlib.pyplot as plt
%matplotlib inline

#knockpy
import knockpy
from knockpy.knockoff_filter import KnockoffFilter
from knockpy.knockoff_stats import data_dependent_threshhold

#GGlasso
from gglasso.problem import glasso_problem


In [None]:
#Import the package rpy2
import rpy2
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import r, pandas2ri
from rpy2.robjects.conversion import localconverter

# import R's packages
base = importr('base')
glmnet = importr('glmnet')
dplyr = importr('dplyr')
survival = importr('survival')

### Auxiliary functions

In [None]:
#Function to make selections
def make_selections(W, fdr):
    """" Calculate data dependent threshhold and selections """
    threshold = data_dependent_threshhold(W=W, fdr=fdr)
    selected_flags = (W >= threshold).astype("float32")
    return selected_flags


def glasso_function(x):
    
    #Instantiate the  glasso_problem
    x = np.array(x)
    P = glasso_problem(x, N=n, reg_params = {'lambda1': 0.05}, latent = False, do_scaling = False)

    # Next, do model selection by solving the problem on a range of lambda values.
    lambda1_range = np.logspace(1, -5, 30)
    modelselect_params = {'lambda1_range': lambda1_range}
    P.model_selection(modelselect_params = modelselect_params, method = 'eBIC', gamma = 0.1)

    #Precision and Sigma matrices
    sol = P.solution.precision_
    return np.linalg.inv(sol)   

def lasso_glmnet(x):
    
  #Transformation to a pandas data.frame
  x = pd.DataFrame(x)
  
  #Convertion of the pandas dataframe to a R dataframe  
  sim = x
  with localconverter(robjects.default_converter + pandas2ri.converter):
    r_sim = robjects.conversion.py2rpy(sim)
  robjects.globalenv["r_sim"] = r_sim
  
  #Loading R libraries  
  base = importr('base')
  glmnet = importr('glmnet')
  dplyr = importr('dplyr')
  survival = importr('survival')

  #Fitting the Cox’s proportional hazards model employing GLMNET
  robjects.r('''
        X <- r_sim %>% select(-c("Status", "Survival_time"))
        X_matrix <- as.matrix(X)
        y <- r_sim %>% select(c("Status", "Survival_time"))
        y_surv <- Surv(y$Survival_time,y$Status)
        cvfit <- cv.glmnet(X_matrix, y_surv, alpha=1, family = "cox", type.measure = "C", nfolds = 5, standardize = TRUE)
        fit <- glmnet(X_matrix,y_surv, alpha = 1, lambda =cvfit$lambda.min, family = "cox",standardize = TRUE)
        fit_coef <- coef(fit)
        fit_coef_vec <- as.vector(fit_coef)
        ''')
  #Coefficients of the best model 
  r_fit_coef_vec = robjects.globalenv['fit_coef_vec']  
  #Transformation to a numpy array
  fit_coef_vec = np.array(r_fit_coef_vec)
    
  return fit_coef_vec  

def latentcor_estimation(x, types):
    
  #Transformation to a pandas data.frame
  x = pd.DataFrame(x)
  
  #Convertion of the pandas dataframe to a R dataframe  
  sim = x
  with localconverter(robjects.default_converter + pandas2ri.converter):
    r_sim = robjects.conversion.py2rpy(sim)
  
  robjects.globalenv["r_sim"] = r_sim
  robjects.globalenv["types"] = types
  
  #Loading R libraries  
  base = importr('base')
  latentcor = importr('latentcor')
  dplyr = importr('dplyr')

  #Fitting the 
  robjects.r('''
        latentcor_hat_r <- latentcor(r_sim , types = types, method="original")$R  
        ''')
      
  #Matrix transformation to a numpy array
  latentcor_hat_r = robjects.globalenv['latentcor_hat_r']
  latentcor_hat = np.array(latentcor_hat_r)

  return latentcor_hat

### Simulation setup

In [None]:
t_initial = timer()

n = 300 #Number of observations
p = 300 #Number of variables

p_bin = 20 #Number of binary covariates

#Active covariates
p_nonnull_cont = 35
p_nonnull_ordinal = 5 

#Number of simulations
nsim = 200

n_cv = 5  #Cross validation
FDR = 0.2 #False discovery rate

#Survival Weibull distribution parameters (scale --> sigma, shape --> nu, lambda=1/(scale^shape))
lambda_T = 0.05 # lamnda=1/(scale^shape)=1/(sigma^nu)
nu_T = 1.5 # With nu_T=1(shape_T=1) we have the exponential distribution 

#Beta coefficients
beta_coef_1 =  np.array([ 1,  1, -1, -1,  1,  1, -1,  1, -1, -1,  1, -1,  1, 
         1, -1,  -1,  1, -1, -1,  1, -1,  1, -1,   1, -1,  1,
       -1,  1, -1,  1, -1, -1,  1,  1,  1,  1, -1, -1,   1, -1])

beta_coef = beta_coef_1
#To change the coefficient's magnitud
beta_coef = np.where(beta_coef_1==1, 0.7, -0.7)

#Censored upper limit
#u_max=88


#Number of elements by block
b_by_block = 10 # 10 for size p=500 

#Correlation AR1(by blocks)
rho = 0.6

#Skew-t distribution
skew_dist = "Yes" # values ["Yes", "No"]
alpha_level = 2
nu_level = 5

# Censoring  10%

In [None]:
replication = 10000000

#Censored upper limit
u_max= 538

### Correlation matrix

In [None]:
#From Pyhton to R
%R -i p
%R -i b_by_block
%R -i rho

%R n_blocks <- p%/%b_by_block
%R covMat <- diag(n_blocks) %x% matrix(rho^abs(matrix(1:b_by_block,b_by_block, b_by_block, byrow = TRUE) - matrix(1:b_by_block, b_by_block, b_by_block)), b_by_block, b_by_block)
%R diag(covMat) <- 1

#From R to Pyhton
%R -o covMat
 

In [None]:
#For the truncated ECFD estimator
delta_n = 1/( (4*n**(1/4))*math.sqrt(math.pi*math.log(n)) )

### Simulations: design matrix X and survival time t

In [None]:
if 'X' in globals():
    del X

In [None]:
#Arrays and lists to save information
ls_simulations = list(range(nsim))
ls_beta = list(range(nsim))
ls_X = list(range(nsim))
ls_types = list(range(nsim))
np_censoring = np.zeros(nsim)
np_mean_survival_time = np.zeros(nsim)

ti = timer() #Initial time


for i in range(nsim):
    
  #Set seed for replication
  np.random.seed(i+replication)  #Python
  %R -i i
  %R -i replication
  %R set.seed(i+replication)  #R

  #Sigma assignation
  Sigma = covMat

  #From Python to R 
  %R -i Sigma
  %R -i n
  %R -i p_bin
  %R -i p  
  %R -i p_nonnull_ordinal 
  %R -i p_nonnull_cont 
  %R -i alpha_level
  %R -i nu_level
    
  #Creation of the vector with correlation information
  %R lowerpart <- lower.tri(Sigma)
  %R rhos <- Sigma[lowerpart] 
  %R p_con <- p - p_bin


  #Binary and continiuos variables
  %R binary <- rep("bin", p_bin)
  %R con <- rep("con", p_con)
  %R types <- sample(c(binary, con))
  
  #Simulations of a mixed random vector
  %R X_norm_bin <- as.data.frame(gen_data(n = n, types = types, rhos = rhos, copulas="no", XP = NULL, showplot = FALSE)$X)
  %R X <- X_norm_bin

  if (skew_dist=="Yes"):  
    #There is seldom an error using the default solver "NB" for the function qsn().
    #Thus, in case of error, the solver is switched to "RFB"
    %R for(i in 1:p) {   if(types[i]=="con"){ tryCatch({X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level,nu=nu_level))}, error=function(e){X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level, nu=nu_level, solver="RFB"))}) }}
      
  #Creating a dataframe using the design matrix X
  #From R to Pyhton
  %R -o X 
  %R -o types
  %R -o p_con
  
  X.reset_index(drop=True, inplace=True)
  ls_X[i] = X
  ls_types[i] = types
  df_X = pd.DataFrame(X)

  #Names for the variables (X)
  numbers = np.arange(1,p+1)
  var_names = ['Var'+ str(number) for number in numbers]
  df_X.columns= var_names

  #Survival time simulation

  #Extracting the ordinal and continuous variables
  %R col_ind_ordinal <- sapply(X, function(col) length(unique(col)) < 4)
  %R col_ind_cont <- sapply(X, function(col) length(unique(col)) > 4)
  %R col_names <- names(X)
  %R col_names_ordinal <- col_names[col_ind_ordinal]
  %R col_names_cont <- col_names[col_ind_cont]  

  #Variables and coefficients nonnull
  if (p_nonnull_ordinal==0):
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE))
  else:
      #If there are ordinal nonnull variables the proportion of nonnull continuous and nonnull ordinal
      #remains the same between diferent simulated data sets  
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE), sample(col_names_ordinal, size=p_nonnull_ordinal, replace=FALSE))
   
  %R ind_betas <- as.numeric(substr(col_nonnull,start=2,stop=4))
  %R -o ind_betas 
  ind_betas_sort = np.sort(ind_betas.astype(int))
  
  #betas and magnitud
  beta = np.zeros(p,dtype=float)
  beta[ind_betas_sort-1] = beta_coef
  ls_beta[i] = beta

  #Survival time simulations (Bender et al. 2006)
  t = ( - (np.log(np.random.uniform(low=0.0, high=1.0, size=n)) )/( lambda_T*np.exp( np.dot(X, beta) ) ))**(1/nu_T)

  #Censored time and censored indicator
  t_cens = np.random.uniform(0, u_max, size=n) 
  I_cens = np.where(t<= t_cens, 1, 0)

  #Observed time 
  t_obs = np.minimum(t, t_cens)    

  #Creating the dataframe with survival information
  df_Y = pd.DataFrame({ 'Status':I_cens,'Survival_time':t_obs})
  
  #Data frame with simulated data (Y,X)
  ls_simulations[i] = pd.concat([df_Y,df_X], axis=1)

  #Saving information
  np_censoring[i] = np.around((1-ls_simulations[i]["Status"].sum()/n)*100,decimals=4)
  np_mean_survival_time[i] = ls_simulations[i]["Survival_time"].mean()
    
  #Eliminating X (If not, X causes problems in the loop)
  del X

time_simulations_1 = timer() - ti 

Censoring's level

In [None]:
np_censoring

In [None]:
np_censoring.mean()

### Cox’s proportional hazards model  with lasso penalization (glmnet)

In [None]:
np_Number_CoxLasso_Rejections = np.zeros(nsim)
np_CoxLasso_Power = np.zeros(nsim)
np_CoxLasso_FDP = np.zeros(nsim)
np_CoxLasso_FD = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
ls_coef_CoxLasso = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations)

for i in range(nsim):
  fit_coef_vec = ls_coef_CoxLasso[i] 
  
  #Variable selection of the Penalized Cox proportional hazard model  
  np_Number_CoxLasso_Rejections[i] = np.sum(fit_coef_vec!= 0)
  print("Number of non-zero coefficients: {}".format(np_Number_CoxLasso_Rejections[i]))
  np_rejections_CoxLasso = np.where(fit_coef_vec!= 0,1,0)

  #Printing and saving the Power and FDP of the Penalized Cox proportional hazard model  
  np_CoxLasso_Power[i] = np.around(100*(np.dot(np_rejections_CoxLasso, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_CoxLasso_FDP[i] = np.around(100*np.dot(np_rejections_CoxLasso, ls_beta[i] == 0) / np_rejections_CoxLasso.sum(), decimals=2)
  np_CoxLasso_FD[i] = np.around(np.dot(np_rejections_CoxLasso, ls_beta[i] == 0))
  print(f"The Penalized Cox proportional hazards model has discovered {np_CoxLasso_Power[i]}% of the non-nulls with a FDP of {np_CoxLasso_FDP[i]}%")

time_CoxLASSO_1 = timer() - ti 

### Estimation of the Latent correlation matrix 

In [None]:
np_Frobenius_norm_latentcor = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_latentcor = Parallel(n_jobs=jobs)(delayed(latentcor_estimation)(x,types) for x,types in zip(ls_X,ls_types))

for i in range(nsim):
    
  #Relative Frobenius norm
  np_Frobenius_norm_latentcor[i]= np.linalg.norm(matrices_latentcor[i] -covMat, 'fro')/np.linalg.norm(covMat, 'fro')

time_latentcor_1 = timer() - ti 

### Graphical Lasso estimation

In [None]:
np_Frobenius_norm_Sigma_hat = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_Sigma_hat = Parallel(n_jobs=jobs)(delayed(glasso_function)(x) for x in matrices_latentcor)

for i in range(nsim):
    
    #Relative Frobenius norm
    np_Frobenius_norm_Sigma_hat[i]= np.linalg.norm(matrices_Sigma_hat[i]-covMat, 'fro')/np.linalg.norm(covMat, 'fro')

    
time_GraphicalLASSO_1 = timer() - ti 

### Knockoffs sampling

In [None]:
ls_simulations_Xk_hat = list(range(nsim))

ti = timer() # Initial time for the loop

for i in range(nsim):

  X = ls_X[i]
  df_X = ls_X[i] 
  types = ls_types[i]
  
  #From Python to R
  %R -i X 
  %R -i types
  %R -i delta_n
  
  #Transformation of the marginal distribution to normal distribution
  %R X_ecdf <- X  
  %R X_norm_hat <- X
  
  #Empirical cumulative distribution function
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i] <- as.vector(ecdf(X[,i])(X[,i])) }}

  #For truncation:
  #Continuous variables
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] < delta_n] <- delta_n }}
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] > (1-delta_n)] <- 1-delta_n }}                                    
  %R for(i in 1:p) {   if(types[i]=="con"){ X_norm_hat[,i] <- as.vector(qnorm( X_ecdf[,i] ) )}}
  #Ordinal variables
  %R for(i in 1:p) {   if(types[i]=="bin"){ X_norm_hat[,i][X[,i]==0] = (-1)}}  

  #From R to Python
  %R -o X_norm_hat
    
  #Object for Gaussian knockoffs using the Sigma_hat and method mvr
  Gaussian_sampler_hat = knockpy.knockoffs.GaussianSampler(X_norm_hat.to_numpy(), mu=None,
                                                           Sigma=matrices_Sigma_hat[i],
                                                           method='mvr', verbose=False)
  Xk_norm_hat = Gaussian_sampler_hat.sample_knockoffs()  

  
  #Creating a dataframes from the knockoffs Xk_norm_hat
  df_Xk_norm_hat = pd.DataFrame(Xk_norm_hat)

  #From Python to R
  %R -i df_Xk_norm_hat
  
  #Transformation of Gaussian knockoffs to the original marginal distribution
  %R df_Xk_hat <- df_Xk_norm_hat
   
  %R for(i in 1:p) {   if(types[i]=="con"){ df_Xk_hat[,i] <- as.vector(quantile(X[,i], probs=pnorm(df_Xk_norm_hat[,i]), type=8)) }}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]>=0]= 1}}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]<0]= 0}}
  
  #From R to Python  
  %R -o df_Xk_hat
  df_Xk_hat.reset_index(drop=True, inplace=True)
    
  #Creating the names for the variables in X
  numbers = np.arange(1,p+1)
  kvar_names = ['KVar'+ str(number) for number in numbers]
  df_Xk_hat.columns= kvar_names     

  #Final dataset
  ls_simulations_Xk_hat[i] = pd.concat([ls_simulations[i], df_Xk_hat], axis=1)
  
time_knockoffs_1 = timer() - ti 

### Cox’s proportional hazards model  with lasso penalization for (X,Xk_hat) (glmnet) 

In [None]:
np_Number_CoxLasso_Rejections_knockoff_hat = np.zeros(nsim)

tii = timer()#Initial time

#Parallel code with Joblib
ls_coef_knockoff_hat = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations_Xk_hat)

for i in range(nsim): 
    np_Number_CoxLasso_Rejections_knockoff_hat[i] = np.sum(ls_coef_knockoff_hat[i]!= 0)

time_CoxLASSO_X_Xk_1 = timer() - tii 

### Wj using the LASSO coeficient difference statistics and threshold rejection

In [None]:
np_Number_Rejections_knockoff_hat = np.zeros(nsim)
np_knockoff_hat_Power = np.zeros(nsim)
np_knockoff_hat_FDP = np.zeros(nsim)
np_knockoff_hat_FD = np.zeros(nsim)

for i in range(nsim): 
  
  #feature important asignation
  Z = ls_coef_knockoff_hat[i]

  #Wj statistic
  pair_W = np.abs(Z[0:p]) - np.abs(Z[p:])

  #Threshold selection and variable selection 
  threshold = data_dependent_threshhold(W=pair_W, fdr= FDR)
  print("Threshold for knockoffs ")
  print(threshold)
  rejections = make_selections(W=pair_W, fdr= FDR)

  #Printing and saving the Power and FDP of the knockoff procedure   
  np_Number_Rejections_knockoff_hat[i] = rejections.sum()
  print("Number of non-zero knockoff coefficients: {}".format(np_Number_Rejections_knockoff_hat[i]))
  np_knockoff_hat_Power[i] = np.around(100*(np.dot(rejections, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_knockoff_hat_FDP[i] = np.around(100*np.dot(rejections, ls_beta[i] == 0) / rejections.sum(), decimals=2)
  np_knockoff_hat_FD[i] = np.around(np.dot(rejections, ls_beta[i] == 0) )
  print(f"The knockoff filter has discovered {np_knockoff_hat_Power[i]}% of the non-nulls with a FDP of {np_knockoff_hat_FDP[i]}%")


In [None]:
# Array of varying feature
np_varying_feature = np.repeat([10],repeats=nsim,axis=0)


#Dataframe with simulated data
df_simulations_results_1 = pd.DataFrame({'Censoring':np_varying_feature,
                                 'Censoring indicator':np_censoring,
                                 'Mean survival time':np_mean_survival_time,
                                 'Relative Frobenius norm latentcor':np_Frobenius_norm_latentcor,
                                 'Relative Frobenius norm Sigma hat':np_Frobenius_norm_Sigma_hat,
                     'Number_CoxLasso_Rejections':np_Number_CoxLasso_Rejections, 
                     'CoxLasso_Power(%)':np_CoxLasso_Power, 
                     'CoxLasso_FDP(%)':np_CoxLasso_FDP,
                     'CoxLasso_FD':np_CoxLasso_FD,
                     'Number_CoxLasso_Rejections_knockoff_hat':np_Number_CoxLasso_Rejections_knockoff_hat,
                     'Number_Rejections_knockoff_hat':np_Number_Rejections_knockoff_hat, 
                     'knockoff_hat_Power(%)':np_knockoff_hat_Power, 
                     'knockoff_hat_FDP(%)':np_knockoff_hat_FDP,
                     'knockoff_hat_FD':np_knockoff_hat_FD})

In [None]:
df_simulations_results_1

In [None]:
df_simulations_results_1.mean()

# Censoring 20%

In [None]:
replication = 10001000

#Censored upper limit
u_max=152

### Correlation matrix

In [None]:
#From Pyhton to R
%R -i p
%R -i b_by_block
%R -i rho

%R n_blocks <- p%/%b_by_block
%R covMat <- diag(n_blocks) %x% matrix(rho^abs(matrix(1:b_by_block,b_by_block, b_by_block, byrow = TRUE) - matrix(1:b_by_block, b_by_block, b_by_block)), b_by_block, b_by_block)
%R diag(covMat) <- 1

#From R to Pyhton
%R -o covMat
 

In [None]:
#For the truncated ECFD estimator (Reference___)
delta_n = 1/( (4*n**(1/4))*math.sqrt(math.pi*math.log(n)) )

### Simulations: design matrix X and survival time t

In [None]:
if 'X' in globals():
    del X

In [None]:
#Arrays and lists to save information
ls_simulations = list(range(nsim))
ls_beta = list(range(nsim))
ls_X = list(range(nsim))
ls_types = list(range(nsim))
np_censoring = np.zeros(nsim)
np_mean_survival_time = np.zeros(nsim)

ti = timer() #Initial time

for i in range(nsim):
    
  #Set seed for replication of the results
  np.random.seed(i+replication)  #Python
  %R -i i
  %R -i replication
  %R set.seed(i+replication)  #R

  #Sigma assignation
  Sigma = covMat

  #From Python to R 
  %R -i Sigma
  %R -i n
  %R -i p_bin
  %R -i p  
  %R -i p_nonnull_ordinal 
  %R -i p_nonnull_cont 
  %R -i alpha_level
  %R -i nu_level

  #Creation of the vector with correlation information
  %R lowerpart <- lower.tri(Sigma)
  %R rhos <- Sigma[lowerpart] 
  %R p_con <- p - p_bin


  #Binary and continiuos variables
  %R binary <- rep("bin", p_bin)
  %R con <- rep("con", p_con)
  %R types <- sample(c(binary, con))
  
  #Simulations of a mixed random vector
  %R X_norm_bin <- as.data.frame(gen_data(n = n, types = types, rhos = rhos, copulas="no", XP = NULL, showplot = FALSE)$X)
  %R X <- X_norm_bin

  if (skew_dist=="Yes"):  
    #There is seldom an error using the default solver "NB" for the function qsn().
    #Thus, in case of error, the solver is switched to "RFB"
    %R for(i in 1:p) {   if(types[i]=="con"){ tryCatch({X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level,nu=nu_level))}, error=function(e){X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level, nu=nu_level, solver="RFB"))}) }}
      
  #Creating a dataframe using the design matrix X
  #From R to Pyhton
  %R -o X 
  %R -o types
  %R -o p_con
  
  X.reset_index(drop=True, inplace=True)
  ls_X[i] = X
  ls_types[i] = types
  df_X = pd.DataFrame(X)

  #Names for the variables (X)
  numbers = np.arange(1,p+1)
  var_names = ['Var'+ str(number) for number in numbers]
  df_X.columns= var_names

  #Survival time simulation

  #Extracting the ordinal and continuous variables
  %R col_ind_ordinal <- sapply(X, function(col) length(unique(col)) < 4)
  %R col_ind_cont <- sapply(X, function(col) length(unique(col)) > 4)
  %R col_names <- names(X)
  %R col_names_ordinal <- col_names[col_ind_ordinal]
  %R col_names_cont <- col_names[col_ind_cont]  

  #Variables and coefficients nonnull
  if (p_nonnull_ordinal==0):
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE))
  else:
      #If there are ordinal nonnull variables the proportion of nonnull continuous and nonnull ordinal
      #remains the same between diferent simulated data sets  
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE), sample(col_names_ordinal, size=p_nonnull_ordinal, replace=FALSE))
   
  %R ind_betas <- as.numeric(substr(col_nonnull,start=2,stop=4))
  %R -o ind_betas 
  ind_betas_sort = np.sort(ind_betas.astype(int))
  
  #betas and magnitud
  beta = np.zeros(p,dtype=float)
  beta[ind_betas_sort-1] = beta_coef
  ls_beta[i] = beta

  #Survival time simulations (Bender et al. 2006)
  t = ( - (np.log(np.random.uniform(low=0.0, high=1.0, size=n)) )/( lambda_T*np.exp( np.dot(X, beta) ) ))**(1/nu_T)

  #Censored time and Censored indicator
  t_cens = np.random.uniform(0, u_max, size=n) 
  I_cens = np.where(t<= t_cens, 1, 0)

  #Observed time 
  t_obs = np.minimum(t, t_cens)    

  #Creating the dataframe with survival information
  df_Y = pd.DataFrame({ 'Status':I_cens,'Survival_time':t_obs})
  
  #Data frame with simulated data (Y,X)
  ls_simulations[i] = pd.concat([df_Y,df_X], axis=1)

  #Saving information
  np_censoring[i] = np.around((1-ls_simulations[i]["Status"].sum()/n)*100,decimals=4)
  np_mean_survival_time[i] = ls_simulations[i]["Survival_time"].mean()
    
  #Eliminating X (If not, X causes problems in the loop)
  del X

time_simulations_2 = timer() - ti 

### Cox’s proportional hazards model with lasso penalization (glmnet)

In [None]:
np_Number_CoxLasso_Rejections = np.zeros(nsim)
np_CoxLasso_Power = np.zeros(nsim)
np_CoxLasso_FDP = np.zeros(nsim)
np_CoxLasso_FD = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
ls_coef_CoxLasso = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations)

for i in range(nsim):
  fit_coef_vec = ls_coef_CoxLasso[i] 
  
  #Variable selection of the Penalized Cox proportional hazard model  
  np_Number_CoxLasso_Rejections[i] = np.sum(fit_coef_vec!= 0)
  print("Number of non-zero coefficients: {}".format(np_Number_CoxLasso_Rejections[i]))
  np_rejections_CoxLasso = np.where(fit_coef_vec!= 0,1,0)

  #Printing and saving the Power and FDP of the Penalized Cox proportional hazard model  
  np_CoxLasso_Power[i] = np.around(100*(np.dot(np_rejections_CoxLasso, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_CoxLasso_FDP[i] = np.around(100*np.dot(np_rejections_CoxLasso, ls_beta[i] == 0) / np_rejections_CoxLasso.sum(), decimals=2)
  np_CoxLasso_FD[i] = np.around(np.dot(np_rejections_CoxLasso, ls_beta[i] == 0))
  print(f"The Penalized Cox proportional hazards model has discovered {np_CoxLasso_Power[i]}% of the non-nulls with a FDP of {np_CoxLasso_FDP[i]}%")

time_CoxLASSO_2 = timer() - ti 

### Estimation of the Latent correlation matrix 

In [None]:
np_Frobenius_norm_latentcor = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_latentcor = Parallel(n_jobs=jobs)(delayed(latentcor_estimation)(x,types) for x,types in zip(ls_X,ls_types))

for i in range(nsim):
    
  #Relative Frobenius norm
  np_Frobenius_norm_latentcor[i]= np.linalg.norm(matrices_latentcor[i] -covMat, 'fro')/np.linalg.norm(covMat, 'fro')

time_latentcor_2 = timer() - ti 

### Graphical lasso estimation

In [None]:
np_Frobenius_norm_Sigma_hat = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_Sigma_hat = Parallel(n_jobs=jobs)(delayed(glasso_function)(x) for x in matrices_latentcor)

for i in range(nsim):
    
    #Relative Frobenius norm
    np_Frobenius_norm_Sigma_hat[i]= np.linalg.norm(matrices_Sigma_hat[i]-covMat, 'fro')/np.linalg.norm(covMat, 'fro')
    
time_GraphicalLASSO_2 = timer() - ti 

### Knockoffs sampling

In [None]:
ls_simulations_Xk_hat = list(range(nsim))

ti = timer()#Initial time for the loop

for i in range(nsim):

  X = ls_X[i]
  df_X = ls_X[i] 
  types = ls_types[i]
  
  #From Python to R
  %R -i X 
  %R -i types
  %R -i delta_n
  
  #Transformation of the marginal distribution to normal distribution
  %R X_ecdf <- X  
  %R X_norm_hat <- X
  
  #Empirical cumulative distribution function
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i] <- as.vector(ecdf(X[,i])(X[,i])) }}

  #For truncation:
  #Continuous variables
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] < delta_n] <- delta_n }}
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] > (1-delta_n)] <- 1-delta_n }}                                    
  %R for(i in 1:p) {   if(types[i]=="con"){ X_norm_hat[,i] <- as.vector(qnorm( X_ecdf[,i] ) )}}
  #Ordinal variables
  %R for(i in 1:p) {   if(types[i]=="bin"){ X_norm_hat[,i][X[,i]==0] = (-1)}}  

  #From R to Python
  %R -o X_norm_hat
    
  #Object for Gaussian knockoffs using the Sigma_hat and method mvr
  Gaussian_sampler_hat = knockpy.knockoffs.GaussianSampler(X_norm_hat.to_numpy(), mu=None,
                                                           Sigma=matrices_Sigma_hat[i],
                                                           method='mvr', verbose=False)
  Xk_norm_hat = Gaussian_sampler_hat.sample_knockoffs()  

  
  #Creating a dataframes from the knockoffs Xk_norm_hat
  df_Xk_norm_hat = pd.DataFrame(Xk_norm_hat)

  #From Python to R
  %R -i df_Xk_norm_hat
   
  #Transformation of Gaussian knockoffs to the original marginal distribution
  %R df_Xk_hat <- df_Xk_norm_hat
   
  %R for(i in 1:p) {   if(types[i]=="con"){ df_Xk_hat[,i] <- as.vector(quantile(X[,i], probs=pnorm(df_Xk_norm_hat[,i]), type=8)) }}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]>=0]= 1}}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]<0]= 0}}
  
  #From R to Python  
  %R -o df_Xk_hat
  df_Xk_hat.reset_index(drop=True, inplace=True)
    
  #Creating the names for the variables in X
  numbers = np.arange(1,p+1)
  kvar_names = ['KVar'+ str(number) for number in numbers]
  df_Xk_hat.columns= kvar_names     

  #Final dataset
  ls_simulations_Xk_hat[i] = pd.concat([ls_simulations[i], df_Xk_hat], axis=1)
  
time_knockoffs_2 = timer() - ti 

### Cox’s proportional hazards model with lasso penalization for (X,Xk_hat) (glmnet)

In [None]:
np_Number_CoxLasso_Rejections_knockoff_hat = np.zeros(nsim)

tii = timer()#Initial time

#Parallel code with Joblib
ls_coef_knockoff_hat = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations_Xk_hat)

for i in range(nsim): 
    np_Number_CoxLasso_Rejections_knockoff_hat[i] = np.sum(ls_coef_knockoff_hat[i]!= 0)

time_CoxLASSO_X_Xk_2 = timer() - tii 

### Wj using the LASSO coeficient difference statistics and threshold rejection

In [None]:
np_Number_Rejections_knockoff_hat = np.zeros(nsim)
np_knockoff_hat_Power = np.zeros(nsim)
np_knockoff_hat_FDP = np.zeros(nsim)
np_knockoff_hat_FD = np.zeros(nsim)

for i in range(nsim): 
  
  #feature important asignation
  Z = ls_coef_knockoff_hat[i]

  #Wj statistic
  pair_W = np.abs(Z[0:p]) - np.abs(Z[p:])

  #Threshold selection and variable selection 
  threshold = data_dependent_threshhold(W=pair_W, fdr= FDR)
  print("Threshold for knockoffs ")
  print(threshold)
  rejections = make_selections(W=pair_W, fdr= FDR)

  #Printing and saving the Power and FDP of the knockoff procedure   
  np_Number_Rejections_knockoff_hat[i] = rejections.sum()
  print("Number of non-zero knockoff coefficients: {}".format(np_Number_Rejections_knockoff_hat[i]))
  np_knockoff_hat_Power[i] = np.around(100*(np.dot(rejections, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_knockoff_hat_FDP[i] = np.around(100*np.dot(rejections, ls_beta[i] == 0) / rejections.sum(), decimals=2)
  np_knockoff_hat_FD[i] = np.around(np.dot(rejections, ls_beta[i] == 0) )
  print(f"The knockoff filter has discovered {np_knockoff_hat_Power[i]}% of the non-nulls with a FDP of {np_knockoff_hat_FDP[i]}%")


In [None]:
# Array of varying feature
np_varying_feature = np.repeat([20],repeats=nsim,axis=0)


#Dataframe with simulated data
df_simulations_results_2 = pd.DataFrame({'Censoring':np_varying_feature,
                                 'Censoring indicator':np_censoring,
                                 'Mean survival time':np_mean_survival_time,
                                 'Relative Frobenius norm latentcor':np_Frobenius_norm_latentcor,
                                 'Relative Frobenius norm Sigma hat':np_Frobenius_norm_Sigma_hat,
                     'Number_CoxLasso_Rejections':np_Number_CoxLasso_Rejections, 
                     'CoxLasso_Power(%)':np_CoxLasso_Power, 
                     'CoxLasso_FDP(%)':np_CoxLasso_FDP,
                     'CoxLasso_FD':np_CoxLasso_FD,
                     'Number_CoxLasso_Rejections_knockoff_hat':np_Number_CoxLasso_Rejections_knockoff_hat,
                     'Number_Rejections_knockoff_hat':np_Number_Rejections_knockoff_hat, 
                     'knockoff_hat_Power(%)':np_knockoff_hat_Power, 
                     'knockoff_hat_FDP(%)':np_knockoff_hat_FDP,
                     'knockoff_hat_FD':np_knockoff_hat_FD})

In [None]:
df_simulations_results_2

In [None]:
df_simulations_results_2.mean()

# Censoring 30%

In [None]:
replication = 10002000

#Censored upper limit
u_max=61

### Correlation matrix

In [None]:
#From Pyhton to R
%R -i p
%R -i b_by_block
%R -i rho

%R n_blocks <- p%/%b_by_block
%R covMat <- diag(n_blocks) %x% matrix(rho^abs(matrix(1:b_by_block,b_by_block, b_by_block, byrow = TRUE) - matrix(1:b_by_block, b_by_block, b_by_block)), b_by_block, b_by_block)
%R diag(covMat) <- 1

#From R to Pyhton
%R -o covMat
 

In [None]:
#For the truncated ECFD estimator (Reference___)
delta_n = 1/( (4*n**(1/4))*math.sqrt(math.pi*math.log(n)) )

### Simulations: design matrix X and survival time t

In [None]:
if 'X' in globals():
    del X

In [None]:
#Arrays and lists to save information
ls_simulations = list(range(nsim))
ls_beta = list(range(nsim))
ls_X = list(range(nsim))
ls_types = list(range(nsim))
np_censoring = np.zeros(nsim)
np_mean_survival_time = np.zeros(nsim)

ti = timer() #Initial time

for i in range(nsim):
    
  #Set seed for replication of the results
  np.random.seed(i+replication)  #Python
  %R -i i
  %R -i replication
  %R set.seed(i+replication)  #R

  #Sigma assignation
  Sigma = covMat

  #From Python to R 
  %R -i Sigma
  %R -i n
  %R -i p_bin
  %R -i p  
  %R -i p_nonnull_ordinal 
  %R -i p_nonnull_cont 
  %R -i alpha_level
  %R -i nu_level

  #Creation of the vector with correlation information
  %R lowerpart <- lower.tri(Sigma)
  %R rhos <- Sigma[lowerpart] 
  %R p_con <- p - p_bin


  #Binary and continiuos variables
  %R binary <- rep("bin", p_bin)
  %R con <- rep("con", p_con)
  %R types <- sample(c(binary, con))
  
  #Simulations of a mixed random vector
  %R X_norm_bin <- as.data.frame(gen_data(n = n, types = types, rhos = rhos, copulas="no", XP = NULL, showplot = FALSE)$X)
  %R X <- X_norm_bin

  if (skew_dist=="Yes"):  
    #There is seldom an error using the default solver "NB" for the function qsn().
    #Thus, in case of error, the solver is switched to "RFB"
    %R for(i in 1:p) {   if(types[i]=="con"){ tryCatch({X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level,nu=nu_level))}, error=function(e){X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level, nu=nu_level, solver="RFB"))}) }}
      
  #Creating a dataframe using the design matrix X
  #From R to Pyhton
  %R -o X 
  %R -o types
  %R -o p_con
  
  X.reset_index(drop=True, inplace=True)
  ls_X[i] = X
  ls_types[i] = types
  df_X = pd.DataFrame(X)

  #Names for the variables (X)
  numbers = np.arange(1,p+1)
  var_names = ['Var'+ str(number) for number in numbers]
  df_X.columns= var_names

  #Survival time simulation

  #Extracting the ordinal and continuous variables
  %R col_ind_ordinal <- sapply(X, function(col) length(unique(col)) < 4)
  %R col_ind_cont <- sapply(X, function(col) length(unique(col)) > 4)
  %R col_names <- names(X)
  %R col_names_ordinal <- col_names[col_ind_ordinal]
  %R col_names_cont <- col_names[col_ind_cont]  

  #Variables and coefficients nonnull
  if (p_nonnull_ordinal==0):
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE))
  else:
      #If there are ordinal nonnull variables the proportion of nonnull continuous and nonnull ordinal
      #remains the same between diferent simulated data sets  
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE), sample(col_names_ordinal, size=p_nonnull_ordinal, replace=FALSE))
   
  %R ind_betas <- as.numeric(substr(col_nonnull,start=2,stop=4))
  %R -o ind_betas 
  ind_betas_sort = np.sort(ind_betas.astype(int))
  
  #betas and magnitud
  beta = np.zeros(p,dtype=float)
  beta[ind_betas_sort-1] = beta_coef
  ls_beta[i] = beta

  #Survival time simulations (Bender et al. 2006)
  t = ( - (np.log(np.random.uniform(low=0.0, high=1.0, size=n)) )/( lambda_T*np.exp( np.dot(X, beta) ) ))**(1/nu_T)

  #Censored time and Censored indicator
  t_cens = np.random.uniform(0, u_max, size=n) #With Upper=70 --> 34% of censoring
  I_cens = np.where(t<= t_cens, 1, 0)

  #Observed time 
  t_obs = np.minimum(t, t_cens)    

  #Creating the dataframe with survival information
  df_Y = pd.DataFrame({ 'Status':I_cens,'Survival_time':t_obs})
  
  #Data frame with simulated data (Y,X)
  ls_simulations[i] = pd.concat([df_Y,df_X], axis=1)

  #Saving information
  np_censoring[i] = np.around((1-ls_simulations[i]["Status"].sum()/n)*100,decimals=4)
  np_mean_survival_time[i] = ls_simulations[i]["Survival_time"].mean()
    
  #Eliminating X (If not, X causes problems in the loop)
  del X

time_simulations_3 = timer() - ti     

### Cox’s proportional hazards model with lasso penalization (glmnet)

In [None]:
np_Number_CoxLasso_Rejections = np.zeros(nsim)
np_CoxLasso_Power = np.zeros(nsim)
np_CoxLasso_FDP = np.zeros(nsim)
np_CoxLasso_FD = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
ls_coef_CoxLasso = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations)

for i in range(nsim):
  fit_coef_vec = ls_coef_CoxLasso[i] 
  
  #Variable selection of the Penalized Cox proportional hazard model  
  np_Number_CoxLasso_Rejections[i] = np.sum(fit_coef_vec!= 0)
  print("Number of non-zero coefficients: {}".format(np_Number_CoxLasso_Rejections[i]))
  np_rejections_CoxLasso = np.where(fit_coef_vec!= 0,1,0)

  #Printing and saving the Power and FDP of the Penalized Cox proportional hazard model  
  np_CoxLasso_Power[i] = np.around(100*(np.dot(np_rejections_CoxLasso, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_CoxLasso_FDP[i] = np.around(100*np.dot(np_rejections_CoxLasso, ls_beta[i] == 0) / np_rejections_CoxLasso.sum(), decimals=2)
  np_CoxLasso_FD[i] = np.around(np.dot(np_rejections_CoxLasso, ls_beta[i] == 0))
  print(f"The Penalized Cox proportional hazards model has discovered {np_CoxLasso_Power[i]}% of the non-nulls with a FDP of {np_CoxLasso_FDP[i]}%")

time_CoxLASSO_3 = timer() - ti 

### Estimation of the latent correlation matrix 

In [None]:
np_Frobenius_norm_latentcor = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_latentcor = Parallel(n_jobs=jobs)(delayed(latentcor_estimation)(x,types) for x,types in zip(ls_X,ls_types))

for i in range(nsim):
    
  #Relative Frobenius norm
  np_Frobenius_norm_latentcor[i]= np.linalg.norm(matrices_latentcor[i] -covMat, 'fro')/np.linalg.norm(covMat, 'fro')

time_latentcor_3 = timer() - ti 

### Graphical lasso estimation

In [None]:
np_Frobenius_norm_Sigma_hat = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_Sigma_hat = Parallel(n_jobs=jobs)(delayed(glasso_function)(x) for x in matrices_latentcor)

for i in range(nsim):
    
    #Relative Frobenius norm
    np_Frobenius_norm_Sigma_hat[i]= np.linalg.norm(matrices_Sigma_hat[i]-covMat, 'fro')/np.linalg.norm(covMat, 'fro')

time_GraphicalLASSO_3 = timer() - ti 

### Knockoffs sampling

In [None]:
ls_simulations_Xk_hat = list(range(nsim))

ti = timer() #Initial time for the loop

for i in range(nsim):

  X = ls_X[i]
  df_X = ls_X[i] 
  types = ls_types[i]
  
  #From Python to R
  %R -i X 
  %R -i types
  %R -i delta_n
  
  #Transformation of the marginal distribution to normal distribution
  %R X_ecdf <- X  
  %R X_norm_hat <- X
  
  #Empirical cumulative distribution function
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i] <- as.vector(ecdf(X[,i])(X[,i])) }}

  #For truncation:
  #Continuous variables
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] < delta_n] <- delta_n }}
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] > (1-delta_n)] <- 1-delta_n }}                                    
  %R for(i in 1:p) {   if(types[i]=="con"){ X_norm_hat[,i] <- as.vector(qnorm( X_ecdf[,i] ) )}}
  #Ordinal variables
  %R for(i in 1:p) {   if(types[i]=="bin"){ X_norm_hat[,i][X[,i]==0] = (-1)}}  

  #From R to Python
  %R -o X_norm_hat
    
  #Object for Gaussian knockoffs using the Sigma_hat and method mvr
  Gaussian_sampler_hat = knockpy.knockoffs.GaussianSampler(X_norm_hat.to_numpy(), mu=None,
                                                           Sigma=matrices_Sigma_hat[i],
                                                           method='mvr', verbose=False)
  Xk_norm_hat = Gaussian_sampler_hat.sample_knockoffs()  

  
  #Creating a dataframes from the knockoffs Xk_norm_hat
  df_Xk_norm_hat = pd.DataFrame(Xk_norm_hat)

  #From Python to R
  %R -i df_Xk_norm_hat
   
  #Transformation of Gaussian knockoffs to the original marginal distribution
  %R df_Xk_hat <- df_Xk_norm_hat
   
  %R for(i in 1:p) {   if(types[i]=="con"){ df_Xk_hat[,i] <- as.vector(quantile(X[,i], probs=pnorm(df_Xk_norm_hat[,i]), type=8)) }}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]>=0]= 1}}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]<0]= 0}}
  
  #From R to Python  
  %R -o df_Xk_hat
  df_Xk_hat.reset_index(drop=True, inplace=True)
    
  #Creating the names for the variables in Xk_hat
  numbers = np.arange(1,p+1)
  kvar_names = ['KVar'+ str(number) for number in numbers]
  df_Xk_hat.columns= kvar_names     

  #Final dataset
  ls_simulations_Xk_hat[i] = pd.concat([ls_simulations[i], df_Xk_hat], axis=1)
  
time_knockoffs_3 = timer() - ti 

### Cox’s proportional hazards model with lasso penalization for (X,Xk_hat) (glmnet)

In [None]:
np_Number_CoxLasso_Rejections_knockoff_hat = np.zeros(nsim)

tii = timer()#Initial time

#Parallel code with Joblib
ls_coef_knockoff_hat = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations_Xk_hat)

for i in range(nsim): 
    np_Number_CoxLasso_Rejections_knockoff_hat[i] = np.sum(ls_coef_knockoff_hat[i]!= 0)
    
time_CoxLASSO_X_Xk_3 = timer() - tii 

### Wj using the LASSO coeficient difference statistics and threshold rejection

In [None]:
np_Number_Rejections_knockoff_hat = np.zeros(nsim)
np_knockoff_hat_Power = np.zeros(nsim)
np_knockoff_hat_FDP = np.zeros(nsim)
np_knockoff_hat_FD = np.zeros(nsim)

for i in range(nsim): 
  
  #feature important asignation
  Z = ls_coef_knockoff_hat[i]

  #Wj statistic
  pair_W = np.abs(Z[0:p]) - np.abs(Z[p:])

  #Threshold selection and variable selection 
  threshold = data_dependent_threshhold(W=pair_W, fdr= FDR)
  print("Threshold for knockoffs ")
  print(threshold)
  rejections = make_selections(W=pair_W, fdr= FDR)

  np_Number_Rejections_knockoff_hat[i] = rejections.sum()
  print("Number of non-zero knockoff coefficients: {}".format(np_Number_Rejections_knockoff_hat[i]))
  np_knockoff_hat_Power[i] = np.around(100*(np.dot(rejections, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_knockoff_hat_FDP[i] = np.around(100*np.dot(rejections, ls_beta[i] == 0) / rejections.sum(), decimals=2)
  np_knockoff_hat_FD[i] = np.around(np.dot(rejections, ls_beta[i] == 0) )
  print(f"The knockoff filter has discovered {np_knockoff_hat_Power[i]}% of the non-nulls with a FDP of {np_knockoff_hat_FDP[i]}%")


In [None]:
# Array of varying feature
np_varying_feature = np.repeat([30],repeats=nsim,axis=0)


#Dataframe with simulated data
df_simulations_results_3 = pd.DataFrame({"Censoring":np_varying_feature,
                                 'Censoring indicator':np_censoring,
                                 'Mean survival time':np_mean_survival_time,
                                 'Relative Frobenius norm latentcor':np_Frobenius_norm_latentcor,
                                 'Relative Frobenius norm Sigma hat':np_Frobenius_norm_Sigma_hat,
                     'Number_CoxLasso_Rejections':np_Number_CoxLasso_Rejections, 
                     'CoxLasso_Power(%)':np_CoxLasso_Power, 
                     'CoxLasso_FDP(%)':np_CoxLasso_FDP,
                     'CoxLasso_FD':np_CoxLasso_FD,
                     'Number_CoxLasso_Rejections_knockoff_hat':np_Number_CoxLasso_Rejections_knockoff_hat,
                     'Number_Rejections_knockoff_hat':np_Number_Rejections_knockoff_hat, 
                     'knockoff_hat_Power(%)':np_knockoff_hat_Power, 
                     'knockoff_hat_FDP(%)':np_knockoff_hat_FDP,
                     'knockoff_hat_FD':np_knockoff_hat_FD})

In [None]:
df_simulations_results_3

In [None]:
df_simulations_results_3.mean()

# Censoring 40%

In [None]:
replication = 10003000

#Censored upper limit
u_max=27.6

### Correlation matrix

In [None]:
#From Pyhton to R
%R -i p
%R -i b_by_block
%R -i rho

%R n_blocks <- p%/%b_by_block
%R covMat <- diag(n_blocks) %x% matrix(rho^abs(matrix(1:b_by_block,b_by_block, b_by_block, byrow = TRUE) - matrix(1:b_by_block, b_by_block, b_by_block)), b_by_block, b_by_block)
%R diag(covMat) <- 1

#From R to Pyhton
%R -o covMat
 

In [None]:
#For the truncated ECFD estimator (Reference___)
delta_n = 1/( (4*n**(1/4))*math.sqrt(math.pi*math.log(n)) )

### Simulations: design matrix X and survival time t

In [None]:
if 'X' in globals():
    del X

In [None]:
#Arrays and lists to save information
ls_simulations = list(range(nsim))
ls_beta = list(range(nsim))
ls_X = list(range(nsim))
ls_types = list(range(nsim))
np_censoring = np.zeros(nsim)
np_mean_survival_time = np.zeros(nsim)

ti = timer() #Initial time

for i in range(nsim):
    
  #Set seed for replication
  np.random.seed(i+replication)  #Python
  %R -i i
  %R -i replication
  %R set.seed(i+replication)  #R

  #Sigma assignation
  Sigma = covMat

  #From Python to R 
  %R -i Sigma
  %R -i n
  %R -i p_bin
  %R -i p  
  %R -i p_nonnull_ordinal 
  %R -i p_nonnull_cont 
  %R -i alpha_level
  %R -i nu_level

  #Creation of the vector with correlation information
  %R lowerpart <- lower.tri(Sigma)
  %R rhos <- Sigma[lowerpart] 
  %R p_con <- p - p_bin


  #Binary and continiuos variables
  %R binary <- rep("bin", p_bin)
  %R con <- rep("con", p_con)
  %R types <- sample(c(binary, con))
  
  #Simulations of a mixed random vector
  %R X_norm_bin <- as.data.frame(gen_data(n = n, types = types, rhos = rhos, copulas="no", XP = NULL, showplot = FALSE)$X)
  %R X <- X_norm_bin

  if (skew_dist=="Yes"):  
    #There is seldom an error using the default solver "NB" for the function qsn().
    #Thus, in case of error, the solver is switched to "RFB"
    %R for(i in 1:p) {   if(types[i]=="con"){ tryCatch({X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level,nu=nu_level))}, error=function(e){X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level, nu=nu_level, solver="RFB"))}) }}
   
  #Creating a dataframe using the design matrix X
  #From R to Pyhton
  %R -o X 
  %R -o types
  %R -o p_con
  
  X.reset_index(drop=True, inplace=True)
  ls_X[i] = X
  ls_types[i] = types
  df_X = pd.DataFrame(X)

  #Names for the variables (X)
  numbers = np.arange(1,p+1)
  var_names = ['Var'+ str(number) for number in numbers]
  df_X.columns= var_names

  #Survival time simulation

  #Extracting the ordinal and continuous variables
  %R col_ind_ordinal <- sapply(X, function(col) length(unique(col)) < 4)
  %R col_ind_cont <- sapply(X, function(col) length(unique(col)) > 4)
  %R col_names <- names(X)
  %R col_names_ordinal <- col_names[col_ind_ordinal]
  %R col_names_cont <- col_names[col_ind_cont]  

  #Variables and coefficients nonnull
  if (p_nonnull_ordinal==0):
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE))
  else:
      #If there are ordinal nonnull variables the proportion of nonnull continuous and nonnull ordinal
      #remains the same between diferent simulated data sets  
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE), sample(col_names_ordinal, size=p_nonnull_ordinal, replace=FALSE))
   
  %R ind_betas <- as.numeric(substr(col_nonnull,start=2,stop=4))
  %R -o ind_betas 
  ind_betas_sort = np.sort(ind_betas.astype(int))
  
  #betas and magnitud
  beta = np.zeros(p,dtype=float)
  beta[ind_betas_sort-1] = beta_coef
  ls_beta[i] = beta

  #Survival time simulations (Bender et al. 2006)
  t = ( - (np.log(np.random.uniform(low=0.0, high=1.0, size=n)) )/( lambda_T*np.exp( np.dot(X, beta) ) ))**(1/nu_T)

  #Censored time and Censored indicator
  t_cens = np.random.uniform(0, u_max, size=n) #With Upper=70 --> 34% of censoring
  I_cens = np.where(t<= t_cens, 1, 0)

  #Observed time 
  t_obs = np.minimum(t, t_cens)    

  #Creating the dataframe with survival information
  df_Y = pd.DataFrame({ 'Status':I_cens,'Survival_time':t_obs})
  
  #Data frame with simulated data (Y,X)
  ls_simulations[i] = pd.concat([df_Y,df_X], axis=1)

  #Saving information
  np_censoring[i] = np.around((1-ls_simulations[i]["Status"].sum()/n)*100,decimals=4)
  np_mean_survival_time[i] = ls_simulations[i]["Survival_time"].mean()
    
  #Eliminating X (If not, X causes problems in the loop)
  del X

time_simulations_4 = timer() - ti   

### Cox’s proportional hazards model with lasso penalization (glmnet)

In [None]:
np_Number_CoxLasso_Rejections = np.zeros(nsim)
np_CoxLasso_Power = np.zeros(nsim)
np_CoxLasso_FDP = np.zeros(nsim)
np_CoxLasso_FD = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
ls_coef_CoxLasso = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations)

for i in range(nsim):
  fit_coef_vec = ls_coef_CoxLasso[i] 
  
  #Variable selection of the Penalized Cox proportional hazard model  
  np_Number_CoxLasso_Rejections[i] = np.sum(fit_coef_vec!= 0)
  print("Number of non-zero coefficients: {}".format(np_Number_CoxLasso_Rejections[i]))
  np_rejections_CoxLasso = np.where(fit_coef_vec!= 0,1,0)

  #Printing and saving the Power and FDP of the Penalized Cox proportional hazard model  
  np_CoxLasso_Power[i] = np.around(100*(np.dot(np_rejections_CoxLasso, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_CoxLasso_FDP[i] = np.around(100*np.dot(np_rejections_CoxLasso, ls_beta[i] == 0) / np_rejections_CoxLasso.sum(), decimals=2)
  np_CoxLasso_FD[i] = np.around(np.dot(np_rejections_CoxLasso, ls_beta[i] == 0))
  print(f"The Penalized Cox proportional hazards model has discovered {np_CoxLasso_Power[i]}% of the non-nulls with a FDP of {np_CoxLasso_FDP[i]}%")

time_CoxLASSO_4 = timer() - ti 

### Estimation of the latent correlation matrix 

In [None]:
np_Frobenius_norm_latentcor = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_latentcor = Parallel(n_jobs=jobs)(delayed(latentcor_estimation)(x,types) for x,types in zip(ls_X,ls_types))

for i in range(nsim):
    
  #Relative Frobenius norm
  np_Frobenius_norm_latentcor[i]= np.linalg.norm(matrices_latentcor[i] -covMat, 'fro')/np.linalg.norm(covMat, 'fro')

time_latentcor_4 = timer() - ti 

### Graphical lasso estimation

In [None]:
np_Frobenius_norm_Sigma_hat = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_Sigma_hat = Parallel(n_jobs=jobs)(delayed(glasso_function)(x) for x in matrices_latentcor)

for i in range(nsim):
    
    #Relative Frobenius norm
    np_Frobenius_norm_Sigma_hat[i]= np.linalg.norm(matrices_Sigma_hat[i]-covMat, 'fro')/np.linalg.norm(covMat, 'fro')
    
time_GraphicalLASSO_4 = timer() - ti 

### Knockoffs sampling

In [None]:
ls_simulations_Xk_hat = list(range(nsim))

ti = timer() # Initial time for the loop

for i in range(nsim):

  X = ls_X[i]
  df_X = ls_X[i] 
  types = ls_types[i]
  
  #From Python to R
  %R -i X 
  %R -i types
  %R -i delta_n
  
  #Transformation of the marginal distribution to normal distribution
  %R X_ecdf <- X  
  %R X_norm_hat <- X
  
  #Empirical cumulative distribution function
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i] <- as.vector(ecdf(X[,i])(X[,i])) }}

  #For truncation:
  #Continuous variables
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] < delta_n] <- delta_n }}
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] > (1-delta_n)] <- 1-delta_n }}                                    
  %R for(i in 1:p) {   if(types[i]=="con"){ X_norm_hat[,i] <- as.vector(qnorm( X_ecdf[,i] ) )}}
  #Ordinal variables
  %R for(i in 1:p) {   if(types[i]=="bin"){ X_norm_hat[,i][X[,i]==0] = (-1)}}  

  #From R to Python
  %R -o X_norm_hat
    
  #Object for Gaussian knockoffs using the Sigma_hat and method mvr
  Gaussian_sampler_hat = knockpy.knockoffs.GaussianSampler(X_norm_hat.to_numpy(), mu=None,
                                                           Sigma=matrices_Sigma_hat[i],
                                                           method='mvr', verbose=False)
  Xk_norm_hat = Gaussian_sampler_hat.sample_knockoffs()  

  
  #Creating a dataframes from the knockoffs Xk_norm_hat
  df_Xk_norm_hat = pd.DataFrame(Xk_norm_hat)

  #From Python to R
  %R -i df_Xk_norm_hat

  #Transformation of Gaussian knockoffs to the original marginal distribution
  %R df_Xk_hat <- df_Xk_norm_hat
   
  %R for(i in 1:p) {   if(types[i]=="con"){ df_Xk_hat[,i] <- as.vector(quantile(X[,i], probs=pnorm(df_Xk_norm_hat[,i]), type=8)) }}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]>=0]= 1}}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]<0]= 0}}
  
  #From R to Python  
  %R -o df_Xk_hat
  df_Xk_hat.reset_index(drop=True, inplace=True)
    
  #Creating the names for the variables in Xk_hat
  numbers = np.arange(1,p+1)
  kvar_names = ['KVar'+ str(number) for number in numbers]
  df_Xk_hat.columns= kvar_names     

  #Final dataset
  ls_simulations_Xk_hat[i] = pd.concat([ls_simulations[i], df_Xk_hat], axis=1)
  
time_knockoffs_4 = timer() - ti 

### Cox’s proportional hazard’s model with lasso penalization for (X,Xk_hat) (glmnet)

In [None]:
np_Number_CoxLasso_Rejections_knockoff_hat = np.zeros(nsim)

tii = timer()#Initial time

#Parallel code with Joblib
ls_coef_knockoff_hat = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations_Xk_hat)

for i in range(nsim): 
    np_Number_CoxLasso_Rejections_knockoff_hat[i] = np.sum(ls_coef_knockoff_hat[i]!= 0)

time_CoxLASSO_X_Xk_4 = timer() - tii

### Wj using the LASSO coeficient difference statistics and threshold rejection

In [None]:
np_Number_Rejections_knockoff_hat = np.zeros(nsim)
np_knockoff_hat_Power = np.zeros(nsim)
np_knockoff_hat_FDP = np.zeros(nsim)
np_knockoff_hat_FD = np.zeros(nsim)

for i in range(nsim): 
  
  #feature important asignation
  Z = ls_coef_knockoff_hat[i]

  #Wj statistic
  pair_W = np.abs(Z[0:p]) - np.abs(Z[p:])
 
  #Threshold selection and variable selection  
  threshold = data_dependent_threshhold(W=pair_W, fdr= FDR)
  print("Threshold for knockoffs ")
  print(threshold)
  rejections = make_selections(W=pair_W, fdr= FDR)

  #Printing and saving the Power and FDP of the knockoff procedure
  np_Number_Rejections_knockoff_hat[i] = rejections.sum()
  print("Number of non-zero knockoff coefficients: {}".format(np_Number_Rejections_knockoff_hat[i]))
  np_knockoff_hat_Power[i] = np.around(100*(np.dot(rejections, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_knockoff_hat_FDP[i] = np.around(100*np.dot(rejections, ls_beta[i] == 0) / rejections.sum(), decimals=2)
  np_knockoff_hat_FD[i] = np.around(np.dot(rejections, ls_beta[i] == 0) )
  print(f"The knockoff filter has discovered {np_knockoff_hat_Power[i]}% of the non-nulls with a FDP of {np_knockoff_hat_FDP[i]}%")


In [None]:
# Array of varying feature
np_varying_feature = np.repeat([40],repeats=nsim,axis=0)


#Dataframe with simulated data
df_simulations_results_4 = pd.DataFrame({'Censoring':np_varying_feature,
                                 'Censoring indicator':np_censoring,
                                 'Mean survival time':np_mean_survival_time,
                                 'Relative Frobenius norm latentcor':np_Frobenius_norm_latentcor,
                                 'Relative Frobenius norm Sigma hat':np_Frobenius_norm_Sigma_hat,
                     'Number_CoxLasso_Rejections':np_Number_CoxLasso_Rejections, 
                     'CoxLasso_Power(%)':np_CoxLasso_Power, 
                     'CoxLasso_FDP(%)':np_CoxLasso_FDP,
                     'CoxLasso_FD':np_CoxLasso_FD,
                     'Number_CoxLasso_Rejections_knockoff_hat':np_Number_CoxLasso_Rejections_knockoff_hat,
                     'Number_Rejections_knockoff_hat':np_Number_Rejections_knockoff_hat, 
                     'knockoff_hat_Power(%)':np_knockoff_hat_Power, 
                     'knockoff_hat_FDP(%)':np_knockoff_hat_FDP,
                     'knockoff_hat_FD':np_knockoff_hat_FD})

In [None]:
df_simulations_results_4

In [None]:
df_simulations_results_4.mean()

# Censoring 50%

In [None]:
replication = 10004000

#Censored upper limit
u_max=13.2

### Correlation matrix

In [None]:
#From Pyhton to R
%R -i p
%R -i b_by_block
%R -i rho

%R n_blocks <- p%/%b_by_block
%R covMat <- diag(n_blocks) %x% matrix(rho^abs(matrix(1:b_by_block,b_by_block, b_by_block, byrow = TRUE) - matrix(1:b_by_block, b_by_block, b_by_block)), b_by_block, b_by_block)
%R diag(covMat) <- 1

#From R to Pyhton
%R -o covMat
 

In [None]:
#For the truncated ECFD estimator (Reference___)
delta_n = 1/( (4*n**(1/4))*math.sqrt(math.pi*math.log(n)) )

### Simulations: design matrix X and survival time t

In [None]:
if 'X' in globals():
    del X

In [None]:
#Arrays and listt to save information
ls_simulations = list(range(nsim))
ls_beta = list(range(nsim))
ls_X = list(range(nsim))
ls_types = list(range(nsim))
np_censoring = np.zeros(nsim)
np_mean_survival_time = np.zeros(nsim)

ti = timer() #Initial time

for i in range(nsim):
    
  #Set seed for replication
  np.random.seed(i+replication)  #Python
  %R -i i
  %R -i replication
  %R set.seed(i+replication)  #R

  #Sigma assignation
  Sigma = covMat

  #From Python to R 
  %R -i Sigma
  %R -i n
  %R -i p_bin
  %R -i p  
  %R -i p_nonnull_ordinal 
  %R -i p_nonnull_cont 
  %R -i alpha_level
  %R -i nu_level

  #Creation of the vector with correlation information
  %R lowerpart <- lower.tri(Sigma)
  %R rhos <- Sigma[lowerpart] 
  %R p_con <- p - p_bin


  #Binary and continiuos variables
  %R binary <- rep("bin", p_bin)
  %R con <- rep("con", p_con)
  %R types <- sample(c(binary, con))
  
  #Simulations of a mixed random vector
  %R X_norm_bin <- as.data.frame(gen_data(n = n, types = types, rhos = rhos, copulas="no", XP = NULL, showplot = FALSE)$X)
  %R X <- X_norm_bin

  if (skew_dist=="Yes"):  
    #There is seldom an error using the default solver "NB" for the function qsn().
    #Thus, in case of error, the solver is switched to "RFB"
    %R for(i in 1:p) {   if(types[i]=="con"){ tryCatch({X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level,nu=nu_level))}, error=function(e){X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level, nu=nu_level, solver="RFB"))}) }}
   
  #Creating a dataframe using the design matrix X
  #From R to Pyhton
  %R -o X 
  %R -o types
  %R -o p_con
  
  X.reset_index(drop=True, inplace=True)
  ls_X[i] = X
  ls_types[i] = types
  df_X = pd.DataFrame(X)

  #Names for the variables (X)
  numbers = np.arange(1,p+1)
  var_names = ['Var'+ str(number) for number in numbers]
  df_X.columns= var_names

  #Survival time simulation

  #Extracting the ordinal and continuous variables
  %R col_ind_ordinal <- sapply(X, function(col) length(unique(col)) < 4)
  %R col_ind_cont <- sapply(X, function(col) length(unique(col)) > 4)
  %R col_names <- names(X)
  %R col_names_ordinal <- col_names[col_ind_ordinal]
  %R col_names_cont <- col_names[col_ind_cont]  

  #Variables and coefficients nonnull
  if (p_nonnull_ordinal==0):
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE))
  else:
      #If there are ordinal nonnull variables the proportion of nonnull continuous and nonnull ordinal
      #remains the same between diferent simulated data sets  
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE), sample(col_names_ordinal, size=p_nonnull_ordinal, replace=FALSE))
   
  %R ind_betas <- as.numeric(substr(col_nonnull,start=2,stop=4))
  %R -o ind_betas 
  ind_betas_sort = np.sort(ind_betas.astype(int))
  
  #betas and magnitud
  beta = np.zeros(p,dtype=float)
  beta[ind_betas_sort-1] = beta_coef
  ls_beta[i] = beta

  #Survival time simulations (Bender et al. 2006)
  t = ( - (np.log(np.random.uniform(low=0.0, high=1.0, size=n)) )/( lambda_T*np.exp( np.dot(X, beta) ) ))**(1/nu_T)

  #Censored time and Censored indicator
  t_cens = np.random.uniform(0, u_max, size=n) #With Upper=70 --> 34% of censoring
  I_cens = np.where(t<= t_cens, 1, 0)

  #Observed time 
  t_obs = np.minimum(t, t_cens)    

  #Creating the dataframe with survival information
  df_Y = pd.DataFrame({ 'Status':I_cens,'Survival_time':t_obs})
  
  #Data frame with simulated data (Y,X)
  ls_simulations[i] = pd.concat([df_Y,df_X], axis=1)

  #Saving information
  np_censoring[i] = np.around((1-ls_simulations[i]["Status"].sum()/n)*100,decimals=4)
  np_mean_survival_time[i] = ls_simulations[i]["Survival_time"].mean()
    
  #Eliminating X (If not, X causes problems in the loop)
  del X

time_simulations_5 = timer() - ti     

### Cox’s proportional hazards model with lasso penalization (glmnet)

In [None]:
np_Number_CoxLasso_Rejections = np.zeros(nsim)
np_CoxLasso_Power = np.zeros(nsim)
np_CoxLasso_FDP = np.zeros(nsim)
np_CoxLasso_FD = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
ls_coef_CoxLasso = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations)

for i in range(nsim):
  fit_coef_vec = ls_coef_CoxLasso[i] 
  
  #Variable selection of the Penalized Cox proportional hazard model  
  np_Number_CoxLasso_Rejections[i] = np.sum(fit_coef_vec!= 0)
  print("Number of non-zero coefficients: {}".format(np_Number_CoxLasso_Rejections[i]))
  np_rejections_CoxLasso = np.where(fit_coef_vec!= 0,1,0)

  #Printing and saving the Power and FDP of the Penalized Cox proportional hazard model  
  np_CoxLasso_Power[i] = np.around(100*(np.dot(np_rejections_CoxLasso, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_CoxLasso_FDP[i] = np.around(100*np.dot(np_rejections_CoxLasso, ls_beta[i] == 0) / np_rejections_CoxLasso.sum(), decimals=2)
  np_CoxLasso_FD[i] = np.around(np.dot(np_rejections_CoxLasso, ls_beta[i] == 0))
  print(f"The Penalized Cox proportional hazards model has discovered {np_CoxLasso_Power[i]}% of the non-nulls with a FDP of {np_CoxLasso_FDP[i]}%")
    
time_CoxLASSO_5 = timer() - ti 

### Estimation of the latent correlation matrix 

In [None]:
np_Frobenius_norm_latentcor = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_latentcor = Parallel(n_jobs=jobs)(delayed(latentcor_estimation)(x,types) for x,types in zip(ls_X,ls_types))

for i in range(nsim):
    
  #Relative Frobenius norm
  np_Frobenius_norm_latentcor[i]= np.linalg.norm(matrices_latentcor[i] -covMat, 'fro')/np.linalg.norm(covMat, 'fro')

time_latentcor_5 = timer() - ti 

### Graphical lasso estimation

In [None]:
np_Frobenius_norm_Sigma_hat = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_Sigma_hat = Parallel(n_jobs=jobs)(delayed(glasso_function)(x) for x in matrices_latentcor)

for i in range(nsim):
    
    #Relative Frobenius norm
    np_Frobenius_norm_Sigma_hat[i]= np.linalg.norm(matrices_Sigma_hat[i]-covMat, 'fro')/np.linalg.norm(covMat, 'fro')

time_GraphicalLASSO_5 = timer() - ti 

### Knockoffs sampling

In [None]:
ls_simulations_Xk_hat = list(range(nsim))

ti = timer() #Initial time for the loop

for i in range(nsim):

  X = ls_X[i]
  df_X = ls_X[i] 
  types = ls_types[i]
  
  #From Python to R
  %R -i X 
  %R -i types
  %R -i delta_n
  
  #Transformation of the marginal distribution to normal distribution
  %R X_ecdf <- X  
  %R X_norm_hat <- X
  
  #Empirical cumulative distribution function
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i] <- as.vector(ecdf(X[,i])(X[,i])) }}

  #For truncation:
  #Continuous variables
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] < delta_n] <- delta_n }}
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] > (1-delta_n)] <- 1-delta_n }}                                    
  %R for(i in 1:p) {   if(types[i]=="con"){ X_norm_hat[,i] <- as.vector(qnorm( X_ecdf[,i] ) )}}
  #Ordinal variables
  %R for(i in 1:p) {   if(types[i]=="bin"){ X_norm_hat[,i][X[,i]==0] = (-1)}}  

  #From R to Python
  %R -o X_norm_hat
    
  #Object for Gaussian knockoffs using the Sigma_hat and method mvr
  Gaussian_sampler_hat = knockpy.knockoffs.GaussianSampler(X_norm_hat.to_numpy(), mu=None,
                                                           Sigma=matrices_Sigma_hat[i],
                                                           method='mvr', verbose=False)
  Xk_norm_hat = Gaussian_sampler_hat.sample_knockoffs()  

  
  #Creating a dataframes from the knockoffs Xk_norm_hat
  df_Xk_norm_hat = pd.DataFrame(Xk_norm_hat)

  #From Python to R
  %R -i df_Xk_norm_hat

  #Transformation of Gaussian knockoffs to the original marginal distribution
  %R df_Xk_hat <- df_Xk_norm_hat
   
  %R for(i in 1:p) {   if(types[i]=="con"){ df_Xk_hat[,i] <- as.vector(quantile(X[,i], probs=pnorm(df_Xk_norm_hat[,i]), type=8)) }}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]>=0]= 1}}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]<0]= 0}}
  
  #From R to Python  
  %R -o df_Xk_hat
  df_Xk_hat.reset_index(drop=True, inplace=True)
    
  #Creating the names for the variables in X
  numbers = np.arange(1,p+1)
  kvar_names = ['KVar'+ str(number) for number in numbers]
  df_Xk_hat.columns= kvar_names     

  #Final dataset
  ls_simulations_Xk_hat[i] = pd.concat([ls_simulations[i], df_Xk_hat], axis=1)
  
time_knockoffs_5 = timer() - ti 

### Cox’s proportional hazards model with lasso penalization for (X,Xk_hat) (glmnet)

In [None]:
np_Number_CoxLasso_Rejections_knockoff_hat = np.zeros(nsim)

tii = timer()#Initial time

#Parallel code with Joblib
ls_coef_knockoff_hat = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations_Xk_hat)

for i in range(nsim): 
    np_Number_CoxLasso_Rejections_knockoff_hat[i] = np.sum(ls_coef_knockoff_hat[i]!= 0)

time_CoxLASSO_X_Xk_5 = timer() - tii 

### Wj using the LASSO coeficient difference statistics and threshold rejection

In [None]:
np_Number_Rejections_knockoff_hat = np.zeros(nsim)
np_knockoff_hat_Power = np.zeros(nsim)
np_knockoff_hat_FDP = np.zeros(nsim)
np_knockoff_hat_FD = np.zeros(nsim)

for i in range(nsim): 
  
  #feature important asignation
  Z = ls_coef_knockoff_hat[i]

  #Wj statistic
  pair_W = np.abs(Z[0:p]) - np.abs(Z[p:])

  #Threshold selection and variable selection
  threshold = data_dependent_threshhold(W=pair_W, fdr= FDR)
  print("Threshold for knockoffs ")
  print(threshold)
  rejections = make_selections(W=pair_W, fdr= FDR)

  #Printing and saving the Power and FDP of the knockoff procedure
  np_Number_Rejections_knockoff_hat[i] = rejections.sum()
  print("Number of non-zero knockoff coefficients: {}".format(np_Number_Rejections_knockoff_hat[i]))
  np_knockoff_hat_Power[i] = np.around(100*(np.dot(rejections, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_knockoff_hat_FDP[i] = np.around(100*np.dot(rejections, ls_beta[i] == 0) / rejections.sum(), decimals=2)
  np_knockoff_hat_FD[i] = np.around(np.dot(rejections, ls_beta[i] == 0) )
  print(f"The knockoff filter has discovered {np_knockoff_hat_Power[i]}% of the non-nulls with a FDP of {np_knockoff_hat_FDP[i]}%")


In [None]:
# Array of varying feature
np_varying_feature = np.repeat([50],repeats=nsim,axis=0)


#Dataframe with simulated data
df_simulations_results_5 = pd.DataFrame({'Censoring':np_varying_feature,
                                 'Censoring indicator':np_censoring,
                                 'Mean survival time':np_mean_survival_time,
                                 'Relative Frobenius norm latentcor':np_Frobenius_norm_latentcor,
                                 'Relative Frobenius norm Sigma hat':np_Frobenius_norm_Sigma_hat,
                     'Number_CoxLasso_Rejections':np_Number_CoxLasso_Rejections, 
                     'CoxLasso_Power(%)':np_CoxLasso_Power, 
                     'CoxLasso_FDP(%)':np_CoxLasso_FDP,
                     'CoxLasso_FD':np_CoxLasso_FD,
                     'Number_CoxLasso_Rejections_knockoff_hat':np_Number_CoxLasso_Rejections_knockoff_hat,
                     'Number_Rejections_knockoff_hat':np_Number_Rejections_knockoff_hat, 
                     'knockoff_hat_Power(%)':np_knockoff_hat_Power, 
                     'knockoff_hat_FDP(%)':np_knockoff_hat_FDP,
                     'knockoff_hat_FD':np_knockoff_hat_FD})

In [None]:
df_simulations_results_5

In [None]:
df_simulations_results_5.mean()

# Censoring 60%

In [None]:
replication = 10004000

#Censored upper limit
u_max=6.7

### Correlation matrix

In [None]:
#From Pyhton to R
%R -i p
%R -i b_by_block
%R -i rho

%R n_blocks <- p%/%b_by_block
%R covMat <- diag(n_blocks) %x% matrix(rho^abs(matrix(1:b_by_block,b_by_block, b_by_block, byrow = TRUE) - matrix(1:b_by_block, b_by_block, b_by_block)), b_by_block, b_by_block)
%R diag(covMat) <- 1

#From R to Pyhton
%R -o covMat
 

In [None]:
#For the truncated ECFD estimator (Reference___)
delta_n = 1/( (4*n**(1/4))*math.sqrt(math.pi*math.log(n)) )

### Simulations: design matrix X and survival time t

In [None]:
if 'X' in globals():
    del X

In [None]:
#Arrays and listt to save information
ls_simulations = list(range(nsim))
ls_beta = list(range(nsim))
ls_X = list(range(nsim))
ls_types = list(range(nsim))
np_censoring = np.zeros(nsim)
np_mean_survival_time = np.zeros(nsim)

ti = timer() #Initial time

for i in range(nsim):
    
  #Set seed for replication
  np.random.seed(i+replication)  #Python
  %R -i i
  %R -i replication
  %R set.seed(i+replication)  #R

  #Sigma assignation
  Sigma = covMat

  #From Python to R 
  %R -i Sigma
  %R -i n
  %R -i p_bin
  %R -i p  
  %R -i p_nonnull_ordinal 
  %R -i p_nonnull_cont 
  %R -i alpha_level
  %R -i nu_level

  #Creation of the vector with correlation information
  %R lowerpart <- lower.tri(Sigma)
  %R rhos <- Sigma[lowerpart] 
  %R p_con <- p - p_bin


  #Binary and continiuos variables
  %R binary <- rep("bin", p_bin)
  %R con <- rep("con", p_con)
  %R types <- sample(c(binary, con))
  
  #Simulations of a mixed random vector
  %R X_norm_bin <- as.data.frame(gen_data(n = n, types = types, rhos = rhos, copulas="no", XP = NULL, showplot = FALSE)$X)
  %R X <- X_norm_bin

  if (skew_dist=="Yes"):  
    #There is seldom an error using the default solver "NB" for the function qsn().
    #Thus, in case of error, the solver is switched to "RFB"
    %R for(i in 1:p) {   if(types[i]=="con"){ tryCatch({X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level,nu=nu_level))}, error=function(e){X[,i] <-as.vector(qst(pnorm(X_norm_bin[,i], mean=0,sd=1),xi=0, omega=1,alpha=alpha_level, nu=nu_level, solver="RFB"))}) }}
   
  #Creating a dataframe using the design matrix X
  #From R to Pyhton
  %R -o X 
  %R -o types
  %R -o p_con
  
  X.reset_index(drop=True, inplace=True)
  ls_X[i] = X
  ls_types[i] = types
  df_X = pd.DataFrame(X)

  #Names for the variables (X)
  numbers = np.arange(1,p+1)
  var_names = ['Var'+ str(number) for number in numbers]
  df_X.columns= var_names

  #Survival time simulation

  #Extracting the ordinal and continuous variables
  %R col_ind_ordinal <- sapply(X, function(col) length(unique(col)) < 4)
  %R col_ind_cont <- sapply(X, function(col) length(unique(col)) > 4)
  %R col_names <- names(X)
  %R col_names_ordinal <- col_names[col_ind_ordinal]
  %R col_names_cont <- col_names[col_ind_cont]  

  #Variables and coefficients nonnull
  if (p_nonnull_ordinal==0):
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE))
  else:
      #If there are ordinal nonnull variables the proportion of nonnull continuous and nonnull ordinal
      #remains the same between diferent simulated data sets  
      %R col_nonnull <- c(sample(col_names_cont, size=p_nonnull_cont, replace=FALSE), sample(col_names_ordinal, size=p_nonnull_ordinal, replace=FALSE))
   
  %R ind_betas <- as.numeric(substr(col_nonnull,start=2,stop=4))
  %R -o ind_betas 
  ind_betas_sort = np.sort(ind_betas.astype(int))
  
  #betas and magnitud
  beta = np.zeros(p,dtype=float)
  beta[ind_betas_sort-1] = beta_coef
  ls_beta[i] = beta

  #Survival time simulations (Bender et al. 2006)
  t = ( - (np.log(np.random.uniform(low=0.0, high=1.0, size=n)) )/( lambda_T*np.exp( np.dot(X, beta) ) ))**(1/nu_T)

  #Censored time and Censored indicator
  t_cens = np.random.uniform(0, u_max, size=n) #With Upper=70 --> 34% of censoring
  I_cens = np.where(t<= t_cens, 1, 0)

  #Observed time 
  t_obs = np.minimum(t, t_cens)    

  #Creating the dataframe with survival information
  df_Y = pd.DataFrame({ 'Status':I_cens,'Survival_time':t_obs})
  
  #Data frame with simulated data (Y,X)
  ls_simulations[i] = pd.concat([df_Y,df_X], axis=1)

  #Saving information
  np_censoring[i] = np.around((1-ls_simulations[i]["Status"].sum()/n)*100,decimals=4)
  np_mean_survival_time[i] = ls_simulations[i]["Survival_time"].mean()
    
  #Eliminating X (If not, X causes problems in the loop)
  del X

time_simulations_6 = timer() - ti     

### Cox’s proportional hazards model with lasso penalization (glmnet)

In [None]:
np_Number_CoxLasso_Rejections = np.zeros(nsim)
np_CoxLasso_Power = np.zeros(nsim)
np_CoxLasso_FDP = np.zeros(nsim)
np_CoxLasso_FD = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
ls_coef_CoxLasso = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations)

for i in range(nsim):
  fit_coef_vec = ls_coef_CoxLasso[i] 
  
  #Variable selection of the Penalized Cox proportional hazard model  
  np_Number_CoxLasso_Rejections[i] = np.sum(fit_coef_vec!= 0)
  print("Number of non-zero coefficients: {}".format(np_Number_CoxLasso_Rejections[i]))
  np_rejections_CoxLasso = np.where(fit_coef_vec!= 0,1,0)

  #Printing and saving the Power and FDP of the Penalized Cox proportional hazard model  
  np_CoxLasso_Power[i] = np.around(100*(np.dot(np_rejections_CoxLasso, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_CoxLasso_FDP[i] = np.around(100*np.dot(np_rejections_CoxLasso, ls_beta[i] == 0) / np_rejections_CoxLasso.sum(), decimals=2)
  np_CoxLasso_FD[i] = np.around(np.dot(np_rejections_CoxLasso, ls_beta[i] == 0))
  print(f"The Penalized Cox proportional hazards model has discovered {np_CoxLasso_Power[i]}% of the non-nulls with a FDP of {np_CoxLasso_FDP[i]}%")
    
time_CoxLASSO_6 = timer() - ti 

### Estimation of the latent correlation matrix 

In [None]:
np_Frobenius_norm_latentcor = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_latentcor = Parallel(n_jobs=jobs)(delayed(latentcor_estimation)(x,types) for x,types in zip(ls_X,ls_types))

for i in range(nsim):
    
  #Relative Frobenius norm
  np_Frobenius_norm_latentcor[i]= np.linalg.norm(matrices_latentcor[i] -covMat, 'fro')/np.linalg.norm(covMat, 'fro')

time_latentcor_6 = timer() - ti 

### Graphical lasso estimation

In [None]:
np_Frobenius_norm_Sigma_hat = np.zeros(nsim)

ti = timer() #Initial time

#Parallel code with Joblib
matrices_Sigma_hat = Parallel(n_jobs=jobs)(delayed(glasso_function)(x) for x in matrices_latentcor)

for i in range(nsim):
    
    #Relative Frobenius norm
    np_Frobenius_norm_Sigma_hat[i]= np.linalg.norm(matrices_Sigma_hat[i]-covMat, 'fro')/np.linalg.norm(covMat, 'fro')

time_GraphicalLASSO_6 = timer() - ti 

### Knockoffs sampling

In [None]:
ls_simulations_Xk_hat = list(range(nsim))

ti = timer() #Initial time for the loop

for i in range(nsim):

  X = ls_X[i]
  df_X = ls_X[i] 
  types = ls_types[i]
  
  #From Python to R
  %R -i X 
  %R -i types
  %R -i delta_n
  
  #Transformation of the marginal distribution to normal distribution
  %R X_ecdf <- X  
  %R X_norm_hat <- X
  
  #Empirical cumulative distribution function
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i] <- as.vector(ecdf(X[,i])(X[,i])) }}

  #For truncation:
  #Continuous variables
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] < delta_n] <- delta_n }}
  %R for(i in 1:p) {   if(types[i]=="con"){ X_ecdf[,i][ X_ecdf[,i] > (1-delta_n)] <- 1-delta_n }}                                    
  %R for(i in 1:p) {   if(types[i]=="con"){ X_norm_hat[,i] <- as.vector(qnorm( X_ecdf[,i] ) )}}
  #Ordinal variables
  %R for(i in 1:p) {   if(types[i]=="bin"){ X_norm_hat[,i][X[,i]==0] = (-1)}}  

  #From R to Python
  %R -o X_norm_hat
    
  #Object for Gaussian knockoffs using the Sigma_hat and method mvr
  Gaussian_sampler_hat = knockpy.knockoffs.GaussianSampler(X_norm_hat.to_numpy(), mu=None,
                                                           Sigma=matrices_Sigma_hat[i],
                                                           method='mvr', verbose=False)
  Xk_norm_hat = Gaussian_sampler_hat.sample_knockoffs()  

  
  #Creating a dataframes from the knockoffs Xk_norm_hat
  df_Xk_norm_hat = pd.DataFrame(Xk_norm_hat)

  #From Python to R
  %R -i df_Xk_norm_hat

  #Transformation of Gaussian knockoffs to the original marginal distribution
  %R df_Xk_hat <- df_Xk_norm_hat
   
  %R for(i in 1:p) {   if(types[i]=="con"){ df_Xk_hat[,i] <- as.vector(quantile(X[,i], probs=pnorm(df_Xk_norm_hat[,i]), type=8)) }}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]>=0]= 1}}
  %R for(i in 1:p) {   if(types[i]=="bin"){ df_Xk_hat[,i][df_Xk_norm_hat[,i]<0]= 0}}
  
  #From R to Python  
  %R -o df_Xk_hat
  df_Xk_hat.reset_index(drop=True, inplace=True)
    
  #Creating the names for the variables in X
  numbers = np.arange(1,p+1)
  kvar_names = ['KVar'+ str(number) for number in numbers]
  df_Xk_hat.columns= kvar_names     

  #Final dataset
  ls_simulations_Xk_hat[i] = pd.concat([ls_simulations[i], df_Xk_hat], axis=1)
  
time_knockoffs_6 = timer() - ti 

### Cox’s proportional hazards model with lasso penalization for (X,Xk_hat) (glmnet)

In [None]:
np_Number_CoxLasso_Rejections_knockoff_hat = np.zeros(nsim)

tii = timer()#Initial time

#Parallel code with Joblib
ls_coef_knockoff_hat = Parallel(n_jobs=jobs)(delayed(lasso_glmnet)(x) for x in ls_simulations_Xk_hat)

for i in range(nsim): 
    np_Number_CoxLasso_Rejections_knockoff_hat[i] = np.sum(ls_coef_knockoff_hat[i]!= 0)

time_CoxLASSO_X_Xk_6 = timer() - tii 

### Wj using the LASSO coeficient difference statistics and threshold rejection

In [None]:
np_Number_Rejections_knockoff_hat = np.zeros(nsim)
np_knockoff_hat_Power = np.zeros(nsim)
np_knockoff_hat_FDP = np.zeros(nsim)
np_knockoff_hat_FD = np.zeros(nsim)

for i in range(nsim): 
  
  #feature important asignation
  Z = ls_coef_knockoff_hat[i]

  #Wj statistic
  pair_W = np.abs(Z[0:p]) - np.abs(Z[p:])

  #Threshold selection and variable selection
  threshold = data_dependent_threshhold(W=pair_W, fdr= FDR)
  print("Threshold for knockoffs ")
  print(threshold)
  rejections = make_selections(W=pair_W, fdr= FDR)

  #Printing and saving the Power and FDP of the knockoff procedure
  np_Number_Rejections_knockoff_hat[i] = rejections.sum()
  print("Number of non-zero knockoff coefficients: {}".format(np_Number_Rejections_knockoff_hat[i]))
  np_knockoff_hat_Power[i] = np.around(100*(np.dot(rejections, ls_beta[i] != 0) / (ls_beta[i] != 0).sum()), decimals=2)
  np_knockoff_hat_FDP[i] = np.around(100*np.dot(rejections, ls_beta[i] == 0) / rejections.sum(), decimals=2)
  np_knockoff_hat_FD[i] = np.around(np.dot(rejections, ls_beta[i] == 0) )
  print(f"The knockoff filter has discovered {np_knockoff_hat_Power[i]}% of the non-nulls with a FDP of {np_knockoff_hat_FDP[i]}%")


In [None]:
# Array of varying feature
np_varying_feature = np.repeat([60],repeats=nsim,axis=0)


#Dataframe with simulated data
df_simulations_results_6 = pd.DataFrame({'Censoring':np_varying_feature,
                                 'Censoring indicator':np_censoring,
                                 'Mean survival time':np_mean_survival_time,
                                 'Relative Frobenius norm latentcor':np_Frobenius_norm_latentcor,
                                 'Relative Frobenius norm Sigma hat':np_Frobenius_norm_Sigma_hat,
                     'Number_CoxLasso_Rejections':np_Number_CoxLasso_Rejections, 
                     'CoxLasso_Power(%)':np_CoxLasso_Power, 
                     'CoxLasso_FDP(%)':np_CoxLasso_FDP,
                     'CoxLasso_FD':np_CoxLasso_FD,
                     'Number_CoxLasso_Rejections_knockoff_hat':np_Number_CoxLasso_Rejections_knockoff_hat,
                     'Number_Rejections_knockoff_hat':np_Number_Rejections_knockoff_hat, 
                     'knockoff_hat_Power(%)':np_knockoff_hat_Power, 
                     'knockoff_hat_FDP(%)':np_knockoff_hat_FDP,
                     'knockoff_hat_FD':np_knockoff_hat_FD})

In [None]:
df_simulations_results_6

In [None]:
df_simulations_results_6.mean()

### Time to run all the scenarios

In [None]:
t_final = timer()

print('Time (hrs) taken to run all is:',round((t_final-t_initial)/3600,4))

In [None]:
#Run time of the different loops

time_simulations = time_simulations_1 +time_simulations_2 + time_simulations_3 +time_simulations_4 + time_simulations_5 +time_simulations_6
print('Time (hrs) taken to create design matriz X and survival time t:',round(time_simulations/3600,4))

time_CoxLASSO = time_CoxLASSO_1 +time_CoxLASSO_2 + time_CoxLASSO_3 +time_CoxLASSO_4 + time_CoxLASSO_5 + time_CoxLASSO_6  
print('Time (hrs) taken to run Cox’s proportional hazard’s model with LASSO penalization',round(time_CoxLASSO/3600,4))

time_latentcor = time_latentcor_1 +time_latentcor_2 + time_latentcor_3 +time_latentcor_4 + time_latentcor_5 +time_latentcor_6 
print('Time (hrs) taken to run latent correlation matrix estimation',round(time_latentcor/3600,4))

time_GraphicalLASSO = time_GraphicalLASSO_1 +time_GraphicalLASSO_2 + time_GraphicalLASSO_3 +time_GraphicalLASSO_4 + time_GraphicalLASSO_5 + time_GraphicalLASSO_6  
print('Time (hrs) taken to run Graphical LASSO',round(time_GraphicalLASSO/3600,4))

time_knockoffs = time_knockoffs_1 +time_knockoffs_2 + time_knockoffs_3 +time_knockoffs_4 +time_knockoffs_5 + time_knockoffs_6 
print('Time (hrs) taken to sample knockoffs',round(time_knockoffs/3600,4))


time_CoxLASSO_X_Xk = time_CoxLASSO_X_Xk_1 +time_CoxLASSO_X_Xk_2 + time_CoxLASSO_X_Xk_3 +time_CoxLASSO_X_Xk_4 + time_CoxLASSO_X_Xk_5 + time_CoxLASSO_X_Xk_6  
print('Time (hrs) taken to run Cox’s proportional hazard’s model with LASSO penalization for (X,Xk)',round(time_CoxLASSO_X_Xk/3600,4))


# Bringing it all together

In [None]:
#Data frame with 
df_simulations_results = pd.concat([df_simulations_results_1,df_simulations_results_2,df_simulations_results_3,df_simulations_results_4,df_simulations_results_5,df_simulations_results_6], axis=0)
df_simulations_results.reset_index(drop=True, inplace=True)
df_simulations_results

In [None]:
results = df_simulations_results[["Censoring","Censoring indicator","CoxLasso_Power(%)", "CoxLasso_FDP(%)", "knockoff_hat_Power(%)", "knockoff_hat_FDP(%)"]].groupby("Censoring").mean()

In [None]:
results

In [None]:
#Saving the results to csv file
results.to_csv('results_n300_p300_varying_censoring_7sep22.csv')

#Saving the simulation_results to csv file
df_simulations_results.to_csv('Simulation_results_n300_p300_censoring_amplitude_7sep22.csv')

### Plots of Average Power and FDR

In [None]:
#Setting the font Arial
plt.rcParams["font.family"] = "Arial"

#Average power and FDR plots side-by-side

x_points = np.array(results.index)
fig, (ax1, ax2) = plt.subplots(1, 2)
#fig.suptitle('Varying censoring',fontsize=18)


#Figure size
mm = 1/25.4
fig.set_figwidth(174*mm)
fig.set_figheight(80*mm)

y1_points = np.array(results["CoxLasso_Power(%)"])
y2_points = np.array(results["knockoff_hat_Power(%)"])
for i, j in zip(x_points,y1_points):
    ax1.text(i, j+3.5, "{:.1f}".format(j), ha="center")
for i, j in zip(x_points,y2_points):
    ax1.text(i, j-8, "{:.1f}".format(j), ha="center")
    
ax1.plot(x_points, y1_points,  marker = 's', label="Lasso Cox", linestyle='dashed')
ax1.plot(x_points, y2_points, marker = 'o',label="LGCK-LCD")
ax1.set_ylim(0, 112)
ax1.set_xlim(6, 64)
ax1.set_xticks(x_points)
ax1.legend(loc="lower left")
ax1.set_xlabel("Censoring rate (%)",fontname='Arial')
ax1.set_ylabel("Average power (%)",fontname='Arial')


y1_points = np.array(results["CoxLasso_FDP(%)"])
y2_points = np.array(results["knockoff_hat_FDP(%)"])

for i, j in zip(x_points,y1_points):
    ax2.text(i, j+3.5, "{:.1f}".format(j), ha="center")
for i, j in zip(x_points,y2_points):
    ax2.text(i, j+3, "{:.1f}".format(j), ha="center")

ax2.plot(x_points, y1_points,  marker = 's', label="Lasso Cox", linestyle='dashed')
ax2.plot(x_points, y2_points, marker = 'o',label="LGCK-LCD")
ax2.set_ylim(0, 112)
ax2.set_xlim(6, 64)
ax2.set_xticks(x_points)
ax2.legend(loc="upper left")
ax2.set_xlabel("Censoring rate (%)",fontname='Arial')
ax2.set_ylabel("FDR (%)",fontname='Arial')

fig.tight_layout()
fig.savefig("Fig4.eps", format="eps", dpi=1200)
fig.savefig("t_n300_p300_Varying_censoring_7sep22.jpg", format="jpg", dpi=300)
fig.show()