In [12]:
%load_ext autoreload
%autoreload 2
import pandas as pd
from plot_utils import *
from preprocess_datasets import load_features_outcomes, load_features_labels, make_training_sets, DEFAULT_FEATURES
from run_models import run_survival_model, run_rf_model, eval_model, save_model, load_model

# Make training sets if they haven't been created yet

device = 'cmod'
dataset = 'random100'
#numeric_feats = ['ip', 'n_e', 'aminor', 'kappa', 'squareness', 'delta', 'li', 'Wmhd']
numeric_feats = DEFAULT_FEATURES
temporal_suff = ['_avg', '_std', '_int', '_der']
temporal_feats = [feat + suff for feat in numeric_feats for suff in temporal_suff]

# TODO: list disruptive vs non-disruptive shots in each dataset
make_training_sets(device, dataset, random_seed=0, window=5)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Training shots: 59
Test shots: 20
Validation shots: 20


In [14]:
from auton_survival.preprocessing import Preprocessor
# Load and preprocess training, test, validation sets
features_train, outcomes_train = load_features_outcomes(device, dataset+'_train', features=numeric_feats+temporal_feats)
features_test, outcomes_test = load_features_outcomes(device, dataset+'_test', features=numeric_feats+temporal_feats)
features_val, outcomes_val = load_features_outcomes(device, dataset+'_val', features=numeric_feats+temporal_feats)

# The features should match the above
_, labels_train = load_features_labels(device, dataset+'_train', 0.15, features=numeric_feats+temporal_feats)
_, labels_test = load_features_labels(device, dataset+'_test', 0.15, features=numeric_feats+temporal_feats)
_, labels_val = load_features_labels(device, dataset+'_val', 0.15, features=numeric_feats+temporal_feats)

# Fit the imputer and scaler to the training data and transform the training, test, and validation data
preprocessor = Preprocessor(cat_feat_strat='ignore', num_feat_strat='mean')
transformer=preprocessor.fit(features_train, cat_feats=[], num_feats=numeric_feats+temporal_feats, one_hot=True, fill_value=-1)

x_train = transformer.transform(features_train)
x_test = transformer.transform(features_test)
x_val = transformer.transform(features_val)


In [15]:
# Train a cph model and save it
cph_model = run_survival_model('cph', x_train, x_val, outcomes_train, outcomes_val)
save_model(cph_model, transformer, 'cph_temporal', device, dataset)

Saved model to models/cph_temporal_cmod_random100.pkl


In [16]:
# Train a random forest model and save it
rf_model = run_rf_model(x_train, x_val, labels_train, labels_val)
save_model(rf_model, transformer, 'rf_temporal', device, dataset)

Saved model to models/rf_temporal_cmod_random100.pkl


In [17]:
# Train a dcph model and save it
dcph_model = run_survival_model('dcph', x_train, x_val, outcomes_train, outcomes_val)
save_model(dcph_model, transformer, 'dcph_temporal', device, dataset)

100%|██████████| 50/50 [00:02<00:00, 23.73it/s]
100%|██████████| 50/50 [00:02<00:00, 24.75it/s]
100%|██████████| 50/50 [00:02<00:00, 17.62it/s]
100%|██████████| 50/50 [00:02<00:00, 17.22it/s]
100%|██████████| 50/50 [00:02<00:00, 24.07it/s]
100%|██████████| 50/50 [00:02<00:00, 24.73it/s]
100%|██████████| 50/50 [00:02<00:00, 17.49it/s]
100%|██████████| 50/50 [00:02<00:00, 17.56it/s]


Saved model to models/dcph_temporal_cmod_random100.pkl


In [18]:
# Train a random survival forest model and save it
rsf_model = run_survival_model('rsf', x_train, x_val, outcomes_train, outcomes_val)
save_model(rsf_model, transformer, 'rsf_temporal', device, dataset)


Error in fitting model for parameters: {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 300}


In [None]:
%%capture --no-display
# Train a dsm model and save it
dsm_model = run_survival_model('dsm', x_train, x_val, outcomes_train, outcomes_val)
save_model(dsm_model, transformer, 'dsm_temporal', device, dataset)

In [None]:
%%capture --no-display
# Train a dcm model and save it
dcm_model = run_survival_model('dcm', x_train, x_val, outcomes_train, outcomes_val)
save_model(dcm_model, transformer, 'dcm_temporal', device, dataset)



In [8]:
# Load a model
cph_model = load_model('cph', device, dataset)

Loaded model from models/cph_cmod_random100.pkl
