In [1]:
import json
import numpy as np
import pandas as pd
from boot_util import *
import sklearn
from sklearn.model_selection import train_test_split
import os

import warnings
warnings.filterwarnings('ignore')

In [2]:
conf_file = "conf_1.json"

In [3]:
def sample(n,sample_type="LPH"):
    if sample_type == "LPH":
        #phaz = 0.1*np.exp((0.44*X1 + 0.66*X2 + 0.88*X3))
        simulator = SimStudyLinearPH()
    elif sample_type == "NLPH":
        #phaz = 0.1*np.exp((0.44*X1 + 0.66*X2 + 0.88*X3 + X1*X1 + X3*X3 + X1*X2 + X1*X3 + X2*X3))
        simulator = SimStudyNonLinearPH()
    elif sample_type == "NLNPH":
        simulator = SimStudyNonLinearNonPH_smooth()
    elif sample_type == "Deep1":
        simulator = SimStudyDeep1()
    elif sample_type == "Deep2":
        simulator = SimStudyDeep2()
    else:
        raise Exception("Unkowne sample method")

    smp = simulator.simulate(n)
    if sample_type == "Deep1" or sample_type == "Deep2":
        ret = pd.DataFrame(smp["covs"],columns=["X1","X2","X3","X4","X5"])
    else:
        ret = pd.DataFrame(smp["covs"],columns=["X1","X2","X3"])
    ret["duration"] = smp["durations"]
    ret["event"] = smp["events"]
    return ret

In [4]:
def get_out_dir_path(conf):
    return f"{conf['out_dir']}/{conf['sample_type']}_n_{conf['n_train']}_p_{conf['patience']}_s_{conf['seed']}_c_{conf['control']}_p_{conf['m']}_d_{conf['depth']}_w_{conf['layer_size']}"

In [5]:
confs = json.loads(open(conf_file,"r").read())

In [6]:
confs

[{'n_train': 10000,
  'n_test': 1000,
  'n_boot': 200,
  'n_samp': 100,
  'grid': [0.0, 27, 0.1],
  'patience': 25,
  'control': 1,
  'out_dir': '../sims/',
  'seed': 1,
  'sample_type': 'NLNPH',
  'm': 100,
  'validation_ratio': 0.2,
  'dropout': 0.1,
  'layer_size': 128,
  'depth': 6,
  'batch_size': 1000,
  'cols_standardize': [],
  'cols_leave': ['X1', 'X2', 'X3']}]

In [None]:
for conf in confs:
    print(conf)
    cols_standardize = conf["cols_standardize"]
    cols_leave = conf["cols_leave"]
    np.random.seed(conf["seed"])
    _ = torch.manual_seed(conf["seed"])
    grid_start, grid_end, grid_step = conf["grid"]
    grid = np.arange(grid_start,grid_end,grid_step)
    base_dir = get_out_dir_path(conf)
    try:
        os.mkdir(base_dir)
    except:
        pass
    
    df_test = sample(conf["n_test"],conf["sample_type"])
    
    df_test.to_csv(f"{base_dir}/test.csv",compression="gzip")
    df_test = pd.read_csv(f"{base_dir}/test.csv",compression="gzip")
    n_samp = conf["n_samp"]
    
    for j in range(0,n_samp):
            np.random.seed(conf["seed"] + j)
            _ = torch.manual_seed(conf["seed"] + j)
            df_train = sample(conf["n_train"],conf["sample_type"])
            df_train.to_csv(f"{base_dir}/train_{j}.csv",compression="gzip")
            df_train,df_val = sklearn.model_selection.train_test_split(df_train,test_size=conf['validation_ratio'])
            for i in range(conf["n_boot"]):
                if i == 0:
                    df_train_boot =  df_train
                    df_val_boot = df_val                    
                    n_fold = conf["m"]
                else:
                    df_train_boot = df_train.sample(df_train.shape[0],replace=True,axis=0)
                    #df_val_boot = df_val.sample(df_val.shape[0],replace=True,axis=0)
                    df_val_boot = df_val
                    n_fold = 1
    
                trans = get_transformers(df_train_boot,cols_standardize,cols_leave)            
                nets = []
                for k in range(n_fold):
                    model = train_model(conf,df_train_boot,df_val_boot,trans,verbose=False)
                    if i == 0 and k == 0:
                        get_test_avg_srv(conf,[model],df_train_boot,df_test,trans).to_csv(f"{base_dir}/theta_{j}.csv",compression="gzip")                       
                    nets += [model]
    
                get_test_avg_srv(conf,nets,df_train_boot,df_test,trans).to_csv(f"{base_dir}/res_boot_samp_{j}_boot_{i}.csv",compression="gzip")

{'n_train': 10000, 'n_test': 1000, 'n_boot': 200, 'n_samp': 100, 'grid': [0.0, 27, 0.1], 'patience': 25, 'control': 1, 'out_dir': '../sims/', 'seed': 1, 'sample_type': 'NLNPH', 'm': 100, 'validation_ratio': 0.2, 'dropout': 0.1, 'layer_size': 128, 'depth': 6, 'batch_size': 1000, 'cols_standardize': [], 'cols_leave': ['X1', 'X2', 'X3']}


In [None]:
pd.DataFrame(res)