## Model features

- fixed length timeseries informed by d_per_de ~ 4

- One classification for an entire small inner subset of each timeseries segment
 
- Optuna integration

- Comparison with best possible majority-rule classification of the inner segment

In [1]:
import sys
import datetime as dt
import h5py
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import optuna
from optuna.integration import TFKerasPruningCallback
from optuna.trial import TrialState
import matplotlib.pyplot as plt
from scipy.stats import mode

# get functions from other notebooks
%run /tigress/kendrab/analysis-notebooks/loss_fns.ipynb
%run /tigress/kendrab/analysis-notebooks/metrics.ipynb
%run /tigress/kendrab/analysis-notebooks/preproc_utils.ipynb

### Optimize via optuna

In [2]:
def objective(trial):
    keras.backend.clear_session()
    
    """ Assemble a model"""
    #######################
    model_name = "eta"
    # hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-3, 1e-1, log=True)
    filters = trial.suggest_categorical("filters", [16,32])
    kernel_size = 3
    pool_size = 2
    padding_length = trial.suggest_int("padding_length", 10, 30)  
                                       # amount of data on each side of each segment for additional info
    stride = trial.suggest_int("stride", 1, 7)  # size (and therefore spacing) of each segment
    input_length = stride + 2*padding_length
    mask_value = int(-10.0)
    epochs = 5
    thinning_factor = [.5, None]
    hyperparams = {'learning_rate':learning_rate, 'filters':filters, 'kernel_size':kernel_size, 'pool_size':pool_size,
                  'padding_length':padding_length, 'stride':stride, 'input_length':input_length, 'epochs':epochs,
                   'thinning_factor':thinning_factor}

    # input
    bx_input = keras.Input(shape=(input_length,1), name="bx") 
    by_input = keras.Input(shape=(input_length,1), name="by") 
    bz_input = keras.Input(shape=(input_length,1), name="bz") 
    jy_input = keras.Input(shape=(input_length,1), name="jy") 
    vz_input = keras.Input(shape=(input_length,1), name="vz") 

    # convolve and pool separately
    bx_conv = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='valid')(bx_input)
    by_conv = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='valid')(by_input)
    bz_conv = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='valid')(bz_input)
    jy_conv = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='valid')(jy_input)
    vz_conv = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='valid')(vz_input)

    bx_pool = keras.layers.MaxPooling1D(pool_size=pool_size)(bx_conv)
    by_pool = keras.layers.MaxPooling1D(pool_size=pool_size)(by_conv)
    bz_pool = keras.layers.MaxPooling1D(pool_size=pool_size)(bz_conv)
    jy_pool = keras.layers.MaxPooling1D(pool_size=pool_size)(jy_conv)
    vz_pool = keras.layers.MaxPooling1D(pool_size=pool_size)(vz_conv)

    # merge the layers together
    avg = keras.layers.Average()([bx_pool, by_pool, bz_pool, jy_pool, vz_pool])
    # convolve and pool
    avg_conv = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='valid')(avg)
    avg_pool = keras.layers.MaxPooling1D(pool_size=pool_size)(avg_conv)


    # use dense layer to output
    flat_pool = keras.layers.Flatten()(avg_pool)
    condensed_logits = keras.layers.Dense(2, activation='relu')(flat_pool)  # one prediction for the entire stride
    condensed_probs = keras.layers.Softmax()(condensed_logits)
    reshaped_logits = keras.layers.Reshape((1, 2))(condensed_logits)
    logits = tf.repeat(reshaped_logits, stride, axis=1) # same probability for each point in the inner segment
    probs = keras.layers.Softmax()(logits)

    # throw together the model
    model = keras.Model(
        inputs=[bx_input, by_input, bz_input, jy_input, vz_input],
        outputs=[probs])

    # show the model
    model.summary()
    keras.utils.plot_model(model, "/scratch/gpfs/kendrab/model_"+model_name+".png", show_shapes=True)


    """Get 1d sampling"""
    #####################
    readpaths = ['/tigress/kendrab/03082021/'+"1000samples_idx22_bxbybzjyvz.hdf5",
                 '/tigress/kendrab/03082021/'+"1000samples_idx18_bxbybzjyvz.hdf5",
                 '/tigress/kendrab/03082021/'+"1000samples_idx15_bxbybzjyvz.hdf5"]

    idx_list = []  # to keep track of which file what sample came from
    s_list = []
    bx_list = []
    by_list = []
    bz_list = []
    jy_list = []
    vz_list = []
    x0_list = []
    x1_list = []
    topo_list = []

    for idx, filepath in enumerate(readpaths):
        file = h5py.File(filepath, 'r')
        idx_list += [np.array([idx for i in bx]) for bx in file['bx_smooth'][:]]  # check this structure!!!
        s_list += list(file['s'][:])
        bx_list += list(file['bx_smooth'][:])
        by_list += list(file['by'][:])
        bz_list += list(file['bz_smooth'][:])
        jy_list += list(file['jy'][:])
        vz_list += list(file['vz'][:]) 
        x0_list += list(file['x0'][:])
        x1_list += list(file['x1'][:])
        topo_list_tmp = list(file['topo'][:])
        for i in range(len(topo_list_tmp)):  # I tried to vectorize this but I didn't get it to work
            topo_list_tmp[i] = topo_list_tmp[i] % 2  # cat 0,2 are not plasmoids, cat 1,3 are
            topo_list_tmp[i] = keras.utils.to_categorical(topo_list_tmp[i], num_classes=2)
        topo_list += topo_list_tmp
        file.close()


    """ Preprocess data"""
    ######################
    idx_segs = batch_unpadded_subsects(idx_list, padding_length, stride)
    s_segs = batch_subsects(s_list, input_length, stride).reshape(-1, input_length, 1)
    bx_segs = batch_subsects(bx_list, input_length, stride).reshape(-1, input_length, 1)
    by_segs = batch_subsects(by_list, input_length, stride).reshape(-1, input_length, 1)
    bz_segs = batch_subsects(bz_list, input_length, stride).reshape(-1, input_length, 1)
    jy_segs = batch_subsects(jy_list, input_length, stride).reshape(-1, input_length, 1)
    vz_segs = batch_subsects(vz_list, input_length, stride).reshape(-1, input_length, 1)
    x0_segs = batch_unpadded_subsects(x0_list, padding_length, stride)
    x1_segs = batch_unpadded_subsects(x1_list, padding_length, stride)
    topo_segs = batch_unpadded_subsects(topo_list, padding_length, stride)
    # find the mode of each topo segment for metrics later
    topo_int_segs = np.argmax(topo_segs, axis=-1)  # back to integer classes
    modes_tmp = mode(topo_int_segs, axis=1)[0].reshape(-1,1,1) # most common class for each segment, broadcastable 
    # put it back in the same shape and style as topo_segs
    topo_modes_segs = keras.utils.to_categorical(np.repeat(modes_tmp, stride, axis=1),  num_classes=2)
    # s p l i t TODO: add validation set?
    (idx_train, idx_test, s_train, s_test, bx_train, bx_test, by_train, by_test, bz_train, bz_test, jy_train, jy_test, vz_train, vz_test, 
         x0_train, x0_test, x1_train, x1_test, topo_train, topo_test, topo_modes_train, topo_modes_test) = \
                           train_test_split(idx_segs, s_segs, bx_segs, by_segs, bz_segs, jy_segs, vz_segs,
                                            x0_segs, x1_segs, topo_segs, topo_modes_segs)
    # try to do some rebalancing in the training set
    # model is struggling on plasmoids, which are underrepresented
    [idx_train, s_train, bx_train, by_train, bz_train, jy_train, vz_train, x0_train, x1_train, topo_modes_train], \
        topo_train = rebalance_ctrl_group([idx_train, s_train, bx_train, by_train, bz_train, jy_train, vz_train, 
                                           x0_train, x1_train, topo_modes_train], topo_train, null_label=[1,0], 
                                          thinning_factor = thinning_factor[0])
    [idx_test, s_test, bx_test, by_test, bz_test, jy_test, vz_test, x0_test, x1_test, topo_modes_test], \
        topo_test = rebalance_ctrl_group([idx_test, s_test, bx_test, by_test, bz_test, jy_test, vz_test, x0_test,
                                         x1_test, topo_modes_test], topo_test, null_label=[1,0],
                                         thinning_factor = thinning_factor[0])
    

    """ Compile and train model """
    ###############################
    weights = {i:np.sum(topo_train)/np.sum(topo_train[...,i]) for i in range(topo_train.shape[-1])}
    opt = keras.optimizers.Adam(learning_rate=learning_rate)
    loss_fn = keras.losses.CategoricalCrossentropy()
    loss = LossPerPt(loss_fn=loss_fn, class_weights=weights)
    metrics = [keras.metrics.BinaryAccuracy(name='acc'), keras.metrics.AUC(name='prc', curve='PR'), keras.metrics.Precision(class_id=1), avg_metric(keras.metrics.Precision)]  # loss_fn keyword left default
    pruning_metric = 'precision'


    model.compile(optimizer=opt, loss=loss, metrics=metrics,
                 run_eagerly = True)  # run eagerly to get .numpy() method

    model.fit(x={'bx': bx_train, 'by': by_train, 'bz': bz_train, 'jy': jy_train, 'vz': vz_train},
              y = topo_train, epochs=epochs, callbacks=[TFKerasPruningCallback(trial, pruning_metric)])
    score = model.evaluate(x={'bx': bx_test, 'by': by_test, 'bz': bz_test, 'jy': jy_test, 'vz': vz_test}, y=topo_test, verbose=0,
                          return_dict=True)
    score_agst_mode = model.evaluate(x={'bx': bx_test, 'by': by_test, 'bz': bz_test, 'jy': jy_test, 'vz': vz_test}, y=topo_modes_test, verbose=0,
                      return_dict=True)
    print(f"Compared against section modes: {score_agst_mode}")
    print("Section mode performance against full sections: ")
    for metric in metrics:
        print(f"{metric.name} : {metric(topo_test, topo_modes_test)}")
    return score[pruning_metric]

In [None]:
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner(n_startup_trials=2))
study.optimize(objective, n_trials=20, timeout=3600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

ax = optuna.visualization.matplotlib.plot_param_importances(study)
plt.show()

[32m[I 2022-05-04 10:10:48,639][0m A new study created in memory with name: no-name-112a54e3-6363-46a0-a29e-88328a385fa9[0m


Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
bx (InputLayer)                 [(None, 66, 1)]      0                                            
__________________________________________________________________________________________________
by (InputLayer)                 [(None, 66, 1)]      0                                            
__________________________________________________________________________________________________
bz (InputLayer)                 [(None, 66, 1)]      0                                            
__________________________________________________________________________________________________
jy (InputLayer)                 [(None, 66, 1)]      0                                            
_______________________________________________________________________________________

[32m[I 2022-05-04 10:26:18,103][0m Trial 0 finished with value: 0.0 and parameters: {'learning_rate': 0.038237236791230686, 'filters': 16, 'padding_length': 30, 'stride': 6}. Best is trial 0 with value: 0.0.[0m


Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
bx (InputLayer)                 [(None, 57, 1)]      0                                            
__________________________________________________________________________________________________
by (InputLayer)                 [(None, 57, 1)]      0                                            
__________________________________________________________________________________________________
bz (InputLayer)                 [(None, 57, 1)]      0                                            
__________________________________________________________________________________________________
jy (InputLayer)                 [(None, 57, 1)]      0                                            
_______________________________________________________________________________________

[32m[I 2022-05-04 10:40:04,085][0m Trial 1 finished with value: 0.0 and parameters: {'learning_rate': 0.004245040033804305, 'filters': 16, 'padding_length': 25, 'stride': 7}. Best is trial 0 with value: 0.0.[0m


prc : 0.9561790227890015
precision : 0.9678416848182678
Precision_avg : 0.9857373535633087
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
bx (InputLayer)                 [(None, 31, 1)]      0                                            
__________________________________________________________________________________________________
by (InputLayer)                 [(None, 31, 1)]      0                                            
__________________________________________________________________________________________________
bz (InputLayer)                 [(None, 31, 1)]      0                                            
__________________________________________________________________________________________________
jy (InputLayer)                 [(None, 31, 1)]      0                                         

[32m[I 2022-05-04 10:42:59,796][0m Trial 2 pruned. Trial was pruned at epoch 0.[0m


Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
bx (InputLayer)                 [(None, 55, 1)]      0                                            
__________________________________________________________________________________________________
by (InputLayer)                 [(None, 55, 1)]      0                                            
__________________________________________________________________________________________________
bz (InputLayer)                 [(None, 55, 1)]      0                                            
__________________________________________________________________________________________________
jy (InputLayer)                 [(None, 55, 1)]      0                                            
_______________________________________________________________________________________

[32m[I 2022-05-04 10:46:42,209][0m Trial 3 pruned. Trial was pruned at epoch 0.[0m


Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
bx (InputLayer)                 [(None, 50, 1)]      0                                            
__________________________________________________________________________________________________
by (InputLayer)                 [(None, 50, 1)]      0                                            
__________________________________________________________________________________________________
bz (InputLayer)                 [(None, 50, 1)]      0                                            
__________________________________________________________________________________________________
jy (InputLayer)                 [(None, 50, 1)]      0                                            
_______________________________________________________________________________________

[32m[I 2022-05-04 10:51:05,210][0m Trial 4 pruned. Trial was pruned at epoch 0.[0m


Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
bx (InputLayer)                 [(None, 53, 1)]      0                                            
__________________________________________________________________________________________________
by (InputLayer)                 [(None, 53, 1)]      0                                            
__________________________________________________________________________________________________
bz (InputLayer)                 [(None, 53, 1)]      0                                            
__________________________________________________________________________________________________
jy (InputLayer)                 [(None, 53, 1)]      0                                            
_______________________________________________________________________________________

[32m[I 2022-05-04 10:56:33,380][0m Trial 5 pruned. Trial was pruned at epoch 0.[0m


Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
bx (InputLayer)                 [(None, 23, 1)]      0                                            
__________________________________________________________________________________________________
by (InputLayer)                 [(None, 23, 1)]      0                                            
__________________________________________________________________________________________________
bz (InputLayer)                 [(None, 23, 1)]      0                                            
__________________________________________________________________________________________________
jy (InputLayer)                 [(None, 23, 1)]      0                                            
_______________________________________________________________________________________