In [1]:
%load_ext autoreload
%autoreload 2
from manage_datasets import load_features_outcomes, load_features_labels, make_training_sets, make_stacked_sets, load_feature_list
from model_utils import make_model, save_model
import yaml

device = 'cmod'
#dataset_path = 'random_2000_shots_50%_flattop'
#dataset_path = 'no_ufo_flattop_1452_shots_50%_disruptive'
dataset_path = 'random_flattop_256_shots_60%_disruptive'
#dataset_path = 'no_ufo_flattop_7736_shots_6%_disruptive'

# Make training sets if they haven't been created yet
try:
    numeric_feats = load_feature_list(device, dataset_path)
except:
    make_training_sets(device, dataset_path, random_seed=0)
    numeric_feats = load_feature_list(device, dataset_path)
    

In [2]:
# Load training, test, validation sets
x_train, y_train = load_features_outcomes(device, dataset_path, 'train', numeric_feats)
x_val, y_val = load_features_outcomes(device, dataset_path, 'val', numeric_feats)

In [4]:
# Retrain the best found models for this dataset
model_strings = ['cph']

for model_string in model_strings:
    with open(f"models/{device}/{dataset_path}/{model_string}.yaml", 'r') as stream:
        config = yaml.safe_load(stream)
        for key, value in config.items():
            # Check if value is not an integer
            if not isinstance(value, int):
                config[key] = value['value']

    model = make_model(config)

    # Check if model is a binary classifier
    if config['model_type'] == 'rf':
        # Check if model is a binary classifier
        disruptive_window = config['disruptive_window']
        _, labels_train = load_features_labels(device, dataset_path, 'train', disruptive_window, numeric_feats)
        _, labels_val = load_features_labels(device, dataset_path, 'val', disruptive_window, numeric_feats)

        model.fit(x_train, labels_train)
    else:
        model.fit(x_train, y_train)
    save_model(model, model_string, device, dataset_path, numeric_feats)

Saved model to models/cmod/random_flattop_256_shots_60%_disruptive/cph.pkl


In [2]:
# Make a temporal dataset
stack_size = 2
make_stacked_sets(device, dataset_path, 'train', stack_size)
make_stacked_sets(device, dataset_path, 'test', stack_size)
make_stacked_sets(device, dataset_path, 'val', stack_size)