In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
from plot_utils import *
from preprocess_datasets import load_features_outcomes, load_features_labels, make_training_sets, DEFAULT_FEATURES
from run_models import run_survival_model, run_rf_model, eval_model, save_model, load_model

# Make training sets if they haven't been created yet

device = 'cmod'
#dataset = 'random100'
dataset = 'random_256_shots_50%_flattop'
selection='fine'

#numeric_feats = ['ip', 'n_e', 'aminor', 'kappa', 'squareness', 'delta', 'li', 'Wmhd']
numeric_feats = DEFAULT_FEATURES
#temporal_suff = ['_avg', '_std', '_int', '_der']
#temporal_feats = [feat + suff for feat in numeric_feats for suff in temporal_suff]

# TODO: list disruptive vs non-disruptive shots in each dataset
make_training_sets(device, dataset, random_seed=0)

Training shots: 152
Test shots: 51
Validation shots: 51


In [2]:
from auton_survival.preprocessing import Preprocessor
# Load and preprocess training, test, validation sets
features_train, outcomes_train = load_features_outcomes(device, dataset+'_train', features=numeric_feats)
features_test, outcomes_test = load_features_outcomes(device, dataset+'_test', features=numeric_feats)
features_val, outcomes_val = load_features_outcomes(device, dataset+'_val', features=numeric_feats)

# The features should match the above
_, labels_train = load_features_labels(device, dataset+'_train', 0.15, features=numeric_feats)
_, labels_test = load_features_labels(device, dataset+'_test', 0.15, features=numeric_feats)
_, labels_val = load_features_labels(device, dataset+'_val', 0.15, features=numeric_feats)

# Fit the imputer and scaler to the training data and transform the training, test, and validation data
preprocessor = Preprocessor(cat_feat_strat='ignore', num_feat_strat='mean')
transformer=preprocessor.fit(features_train, cat_feats=[], num_feats=numeric_feats, one_hot=True, fill_value=-1)

x_train = transformer.transform(features_train)
x_test = transformer.transform(features_test)
x_val = transformer.transform(features_val)


In [3]:
# Train a cph model and save it
cph_model = run_survival_model('cph', x_train, x_val, outcomes_train, outcomes_val, selection=selection)
save_model(cph_model, transformer, 'cph', device, dataset)

Saved model to models/cph_cmod_random_256_shots_50%_flattop.pkl


In [10]:
# Train a random forest model and save it
rf_model = run_rf_model(x_train, x_val, labels_train, labels_val)
save_model(rf_model, transformer, 'rf', device, dataset)

Saved model to models/rf_temporal_cmod_random_256_shots_50%_flattop.pkl


In [4]:
# Train a dcph model and save it
dcph_model = run_survival_model('dcph', x_train, x_val, outcomes_train, outcomes_val, selection=selection)
save_model(dcph_model, transformer, 'dcph', device, dataset)

100%|██████████| 50/50 [00:01<00:00, 25.76it/s]
100%|██████████| 50/50 [00:01<00:00, 26.12it/s]
100%|██████████| 50/50 [00:01<00:00, 25.83it/s]
100%|██████████| 50/50 [00:01<00:00, 25.78it/s]
100%|██████████| 50/50 [00:01<00:00, 26.50it/s]
100%|██████████| 50/50 [00:01<00:00, 26.66it/s]
100%|██████████| 50/50 [00:01<00:00, 26.09it/s]
100%|██████████| 50/50 [00:01<00:00, 26.78it/s]
 76%|███████▌  | 38/50 [00:01<00:00, 25.99it/s]
100%|██████████| 50/50 [00:01<00:00, 26.97it/s]
100%|██████████| 50/50 [00:02<00:00, 17.60it/s]
100%|██████████| 50/50 [00:02<00:00, 17.48it/s]
100%|██████████| 50/50 [00:02<00:00, 17.47it/s]
100%|██████████| 50/50 [00:02<00:00, 17.05it/s]
100%|██████████| 50/50 [00:02<00:00, 18.08it/s]
100%|██████████| 50/50 [00:02<00:00, 17.97it/s]
100%|██████████| 50/50 [00:02<00:00, 17.09it/s]
 80%|████████  | 40/50 [00:02<00:00, 16.57it/s]
 70%|███████   | 35/50 [00:02<00:00, 17.06it/s]
 78%|███████▊  | 39/50 [00:02<00:00, 16.46it/s]
100%|██████████| 50/50 [00:02<00:00, 22.

Error in fitting model for parameters: {'bs': 50, 'epochs': 200, 'layers': [200], 'learning_rate': 2.1544346900318823e-05}


100%|██████████| 50/50 [00:02<00:00, 22.49it/s]
100%|██████████| 50/50 [00:02<00:00, 22.59it/s]
100%|██████████| 50/50 [00:02<00:00, 22.35it/s]
100%|██████████| 50/50 [00:02<00:00, 22.19it/s]
 42%|████▏     | 21/50 [00:00<00:01, 22.93it/s]

100%|██████████| 50/50 [00:02<00:00, 21.53it/s]


In [14]:
# Train a random survival forest model and save it
rsf_model = run_survival_model('rsf', x_train, x_val, outcomes_train, outcomes_val, selection=selection)
save_model(rsf_model, transformer, 'rsf', device, dataset)


Error in fitting model for parameters: {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 100}


In [12]:
%%capture --no-display
# Train a dsm model and save it
dsm_model = run_survival_model('dsm', x_train, x_val, outcomes_train, outcomes_val, selection=selection)
save_model(dsm_model, transformer, 'dsm', device, dataset)

In [13]:
%%capture --no-display
# Train a dcm model and save it
dcm_model = run_survival_model('dcm', x_train, x_val, outcomes_train, outcomes_val, selection=selection)
save_model(dcm_model, transformer, 'dcm', device, dataset)

In [8]:
# Load a model
cph_model = load_model('cph', device, dataset)

Loaded model from models/cph_cmod_random100.pkl
