## Simple model using just Bx, By, Bz
 classifications: separatrices, o_structures, null
 
 fixed length timeseries informed by d_per_de ~ 4
 
 structure will start the same as alpha but the end goal is to change it to make use of the fixed length series

In [None]:
import sys
import h5py
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
# get functions from other notebooks
%run /tigress/kendrab/analysis-notebooks/loss_fns.ipynb
%run /tigress/kendrab/analysis-notebooks/metrics.ipynb
%run /tigress/kendrab/analysis-notebooks/preproc_utils.ipynb

### Assemble a model

In [None]:
# hyperparameters
filters = 32
kernel_size = 10
window = 30
stride = 15
mask_value = int(-10.0)
epochs = 10
# max_seq_len = 10000

# input
bx_input = keras.Input(shape=(window,1), name="bx") 
by_input = keras.Input(shape=(window,1), name="by") 
bz_input = keras.Input(shape=(window,1), name="bz") 

# mask any necessary values
mask_layer = keras.layers.Masking(mask_value=mask_value)
bx_masked = mask_layer(bx_input)
by_masked = mask_layer(by_input)
bz_masked = mask_layer(bz_input)

# convolve
conv_layer = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='same')
bx_conv = conv_layer(bx_masked)
by_conv = conv_layer(by_masked)
bz_conv = conv_layer(bz_masked)
# merge the layers together and convolve
b = keras.layers.Average()([bx_conv, by_conv, bz_conv])
b_conv = keras.layers.Conv1D(filters=filters,
                              kernel_size=kernel_size,
                              padding='same')(b)
logits = tf.keras.layers.LSTM(4, return_sequences=True)(b_conv)
probs = tf.keras.layers.Softmax()(logits)
# throw together the model
model = keras.Model(
    inputs=[bx_input, by_input, bz_input],
    outputs=[probs])

# show the model
model.summary()
keras.utils.plot_model(model, "/scratch/gpfs/kendrab/model_gamma.png", show_shapes=True)

### Get 1d sampling (If training/testing only, not building!)
Generated by [1d_sampling](./1d_sampling.ipynb)

In [None]:
# TODO use command line args or someting easier than throwing it here
readpaths = ['/tigress/kendrab/03082021/'+"1000samples_idx31_bxbybzjyvz.hdf5",
             '/tigress/kendrab/03082021/'+"1000samples_idx22_bxbybzjyvz.hdf5",
             '/tigress/kendrab/03082021/'+"1000samples_idx15_bxbybzjyvz.hdf5"]

bx_list = []
by_list = []
bz_list = []
topo_list = []

for filepath in readpaths:
    file = h5py.File(filepath, 'r')
    bx_list += list(file['bx_smooth'][:])
    by_list += list(file['by'][:])
    bz_list += list(file['bz_smooth'][:])
    topo_list_tmp = list(file['topo'][:])
    for i in range(len(topo_list_tmp)):  # I tried to vectorize this but I didn't get it to work
        topo_list_tmp[i] = keras.utils.to_categorical(topo_list_tmp[i], num_classes=4)
    topo_list += topo_list_tmp
    file.close()

### Preprocess data

In [None]:
# chunk into sliding windows (put fn into preproc_utils)
# I wish I could make this cleaner but varying inputs isn't a great idea in ML, right?
# So you'll have to hardcode in the new inputs you want to have
# Is this good practice???
bx_segs = batch_subsects(bx_list, window, stride).reshape(-1, window, 1)
by_segs = batch_subsects(by_list, window, stride).reshape(-1, window, 1)
bz_segs = batch_subsects(bz_list, window, stride).reshape(-1, window, 1)
topo_segs = batch_subsects(topo_list, window, stride)

(bx_train, bx_test, by_train, by_test, bz_train, bz_test, topo_train, topo_test) = \
                       train_test_split(bx_segs, by_segs, bz_segs, topo_segs)
# try to do some rebalancing in the training set
[bx_train, by_train, bz_train], topo_train = rebalance_ctrl_group([bx_train, by_train, bz_train], topo_train,
                                                                 null_label=[1,0,0,0], thinning_factor = 0.9)

### Compile and train model

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
loss_fn = tfa.losses.SigmoidFocalCrossEntropy(gamma=10)  # gamma must be an integer apparently (in int form)
loss = gen_loss_per_pt(mask_layer=mask_layer, loss_fn=loss_fn)
metric = gen_metric_per_cat(mask_layer=mask_layer)
metrics = ["acc"]  # loss_fn keyword left default
# for i in range(4):
#     metrics.append(gen_metric_per_cat(mask_layer=mask_layer, cat_idx=i))


model.compile(optimizer=opt, loss=loss, metrics=metrics,
             run_eagerly = True)  # run eagerly to get .numpy() method

In [None]:
model.fit(x={'bx': bx_train, 'by': by_train, 'bz': bz_train}, y = topo_train, epochs=epochs)

### Observe the results, debug

In [None]:
print([np.max(topo_train[:,:,i]) for i in range(4)])
print([np.max(model(inputs={'bx': bx_train, 'by': by_train, 'bz': bz_train})[:,:,i]) for i in range(4)])