# This is an example file to show how to train the model using the Rg.npy dataset.


### Import all required packages

In [None]:
# Import required packages
import numpy as np
import tensorflow as tf
import random
from numpy import sqrt
from sklearn.preprocessing import MinMaxScaler
from collections import Counter
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping
import utils

# Load the data and create arrays for Rg and sequences
The 3 parameters color mapping features can be obtained using seqs_to_color_mapping function.
For the other combinations, it can be obtained by selecting from the output array.

In [None]:
seq = np.load("Rg_data.npy",allow_pickle=True)
seq = seq.item()
data_seq = []
Rg = []
nu = []
for i in range(len(seq)):
    data_seq.append(seq[i][0])
    Rg.append(seq[i][1][1][1])
    nu.append(seq[i][1][1][3])
OE = utils.seqs_to_ordinal_encoding(data_seq)
Rg = np.array(Rg).reshape(-1,1)
nu = np.array(nu).reshape(-1,1)
X = utils.seqs_to_color_mapping(data_seq)
nRow, nCol = X.shape[1], X.shape[2]
Y = np.hstack((Rg,nu))

In [None]:
fold = 6
seed = 10
split = 8
c, CL = utils.get_CL_from_OE(OE)
Train_indices,Test_indices = utils.LC_split_CL(fold,split,seed,c)

# Learning curve train test split

## Define a model using the best parameters and obtain the learning curve

In [None]:
# Split train and test based on indices

X_train_unscaled = []
X_test_unscaled = []
Y_train_unscaled = []
Y_test_unscaled = []

for i in Train_indices:
    X_train_unscaled.append(X[i])
    Y_train_unscaled.append(Y[i])
for i in Test_indices:
    X_test_unscaled.append(X[i])
    Y_test_unscaled.append(Y[i])
    
X_train_unscaled = np.vstack(X_train_unscaled)
X_test_unscaled = np.stack(X_test_unscaled)
Y_train_unscaled = np.vstack(Y_train_unscaled)
Y_test_unscaled = np.stack(Y_test_unscaled)

Y_train_Rg = Y_train_unscaled[:,0]
Y_test_Rg = Y_test_unscaled[:,0]



In [None]:
arch = [20,10]
arch.append(1)

size_of_batch= 32
num_epochs   = 500

use_perms    = bool(False)
num_filters   = 8 # number of filters for convolution
kernel_size   = 3
window_stride = 2


nlayer = len(arch)
acts      = ['relu' for i in range(nlayer)]
acts[-1]  = None
DR        = [0.2    for i in range(nlayer)]
learning_rate = 0.001

nesterovFlag  = False
the_objective = 'mean_squared_error'

the_optimizer = tf.keras.optimizers.Nadam(lr=learning_rate,beta_1=0.9,beta_2=0.999,epsilon=None,schedule_decay=0.004)
verbosity     = 2
tf.random.set_seed(10)

model = Sequential()

model.add(Conv1D(num_filters,kernel_size,strides=window_stride,padding='same',data_format="channels_last",
  input_shape=(nRow,nCol), activation="relu",kernel_initializer='lecun_normal'))

model.add(Flatten(data_format="channels_last"))
#----CREATE HIDDEN LAYERS----
for i in range(1,nlayer-1):
    model.add(Dense(arch[i],activation=acts[i],
    kernel_initializer="lecun_normal"))
    model.add(Dropout(DR[i]))

#----OUTPUTS----
model.add(Dense(arch[-1],activation=None))
model.summary()
model.compile(optimizer=the_optimizer,loss=the_objective,metrics=['mse'])


# Set up callbacks
myCallbacks = [EarlyStopping(monitor='val_loss',patience=50,restore_best_weights=True)]

## Actual fitting
history = model.fit(X_train_unscaled,Y_train_Rg,batch_size=size_of_batch,
      epochs=num_epochs,callbacks=myCallbacks,
      verbose=verbosity,shuffle=True,validation_split=0.25)

In [None]:
Y_pred = model.predict(X_test_unscaled)
Y_pred = np.squeeze(Y_pred)
print(utils.coeff_determination(Y_test_Rg,Y_pred))
print(utils.percent_error(Y_test_Rg,Y_pred))
print(utils.MAE(Y_test_Rg,Y_pred))
print(utils.MSE(Y_test_Rg,Y_pred))
print(utils.RMSE(Y_test_Rg,Y_pred))