# Model Tests
This Notebook is to train or test specific models.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import MaxAbsScaler
import seaborn as sns
sns.set()

seed_val = 2000
np.random.seed(seed_val)

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

In [None]:
def split_data(dataset, seed, train_ratio=0.6, shuffle=True, highest_at_top=False):
    if shuffle:
        dataset = dataset.sample(frac=1, random_state=seed)#.reset_index(drop=True)
    
    if highest_at_top:
        df = dataset.copy()
        sub_115 = df.drop( df[ df["eta c"]>0.9 ].index)
        big_115 = df[df["eta c"] > 0.9]
        # shuffle eta c < 1.15 data
        sub_115 = sub_115.sample(frac=1, random_state=seed)
        # join the data
        x = big_115.append(sub_115)

        train_dataset = x[0:int(len(x)*train_ratio)]
        test_dataset = dataset.drop(train_dataset.index)
        return x, test_dataset
    
    train_dataset = dataset.sample(frac=train_ratio, random_state=0)
    test_dataset = dataset.drop(train_dataset.index)

    return train_dataset, test_dataset

def add_bias(data):
    N1 = np.shape(data)[0]
    N2 = np.shape(data)[1]
    a = -1*np.ones((N1,N2+1))
    a[:,:-1] = data
    return a

def add_noise(dataset, target_column=4, noise_var=0.01, input_n=False, output_n=False):
    """ Called on DATAFRAME training data. """
    features = dataset.to_numpy()[:,0:target_column]
    labels = np.reshape(dataset.to_numpy()[:,target_column], (-1,1))
    
    if input_n:
        noise = np.reshape(np.random.normal(0,0.01,np.shape(features)[0]*np.shape(features)[1]),(np.shape(features)[0],np.shape(features)[1]))
        features = features + noise 

    if output_n:
        noise = np.reshape(np.random.normal(0,0.01,np.shape(labels)[0]*np.shape(labels)[1]),(np.shape(labels)[0],np.shape(labels)[1]))
        labels = labels + noise
    return features, labels

def check_eta_range(dataset, nums=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4]):
    # nums = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4]
    less_than_percents = []
    in_range_percents = []
    x_ranges = []

    # lim_dataset[ (lim_dataset["b1"]<0.1) & (lim_dataset["RatioTotalArea"]<0.2) ]

    for i in range(len(nums)):
        less_than_percents.append( ((len(dataset[dataset["eta c"] <= nums[i]]))/len(dataset))*100)
        in_range_percents.append( (len(dataset[ (dataset["eta c"]<nums[i]) & (dataset["eta c"] >= nums[i-1]) ])/len(dataset))*100 )
        x_ranges.append("{one}<EtaC<{two}".format(one=nums[i-1], two=nums[i]) )

    nums.pop(0)
    less_than_percents.pop(0)
    in_range_percents.pop(0)
    x_ranges.pop(0)

    fig = plt.figure()
    fig, ax = plt.subplots(ncols=2, figsize=(15,8))

    ax[0].scatter(nums, less_than_percents)
    ax[0].set_ylabel("Percentage")
    ax[0].set_xlabel("Eta c")
    ax[0].set_title("Percentage of data with Eta C less than x")
    for i in range(len(less_than_percents)):
        if less_than_percents[i] > 99.0:
            ax[0].scatter(nums[i], less_than_percents[i], c="red", label=">99%")
    ax[0].legend()



    ax[1].scatter(x_ranges, in_range_percents)
    ax[1].set_ylabel("Percentage")
    ax[1].set_xlabel("Eta c")
    ax[1].set_title("Percentage of data with Eta C within range")
    for i in range(len(in_range_percents)):
        if sum(in_range_percents[0:i])>99:
            ax[1].scatter(x_ranges[i], in_range_percents[i], c="red", label="sum>99%")
    ax[1].legend()

    plt.xticks(rotation='vertical')


In [None]:
name = "LIM_scaled.csv"
name = "data.csv"

dataset = pd.read_csv(name)
# data2.describe().transpose()
dataset.pop("Unnamed: 0")

# scale the data
scaled_dataset = dataset.copy()
scaled_dataset['b1'] = MaxAbsScaler().fit_transform(dataset['b1'].values.reshape(-1,1))
scaled_dataset['a2'] = MaxAbsScaler().fit_transform(dataset['a2'].values.reshape(-1,1))
scaled_dataset['RatioTotalArea'] = MaxAbsScaler().fit_transform(dataset['RatioTotalArea'].values.reshape(-1,1))
scaled_dataset['frac'] = MaxAbsScaler().fit_transform(dataset['frac'].values.reshape(-1,1))

# split the data
train_dataset, test_dataset = split_data(scaled_dataset.copy(), seed_val, train_ratio=0.8, highest_at_top=True)

# sort the data to check representation
# sorted_train = train_dataset.sort_index()
sorted_train = train_dataset
sorted_test = test_dataset.sort_index()

# check representation
fig = plt.figure()
fig, ax = plt.subplots(ncols=2, figsize=(15,7))

ax[0].scatter(x=np.arange(len(sorted_train)), y=sorted_train['eta c'], marker='.', alpha=0.4)
ax[1].scatter(x=np.arange(len(sorted_test)), y=sorted_test['eta c'], marker='.', alpha=0.4)
ax[0].set_ylabel("eta c")
ax[0].set_title("Training Data")
ax[1].set_title("Testing Data")


In [None]:
check_eta_range(train_dataset.copy(), nums=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4])

In [None]:
check_eta_range(test_dataset.copy(), nums=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4])

In [None]:
class MLP_Regression():
    def __init__(self, model):
        self.model = model

    def learning_rate(self, initial_lr=1e-2, decay_steps=1e5, decay_rate=0.9):
        self.lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=initial_lr, decay_steps=decay_steps, decay_rate=decay_rate)

    def SGDoptimizer(self, momentum=0.1, nesterov=False, initial_lr=1e-2, decay_steps=1e5, decay_rate=0.9):
        self.learning_rate(initial_lr, decay_steps, decay_rate)
        self.optimizer = keras.optimizers.SGD(learning_rate=self.lr_schedule, momentum=momentum, nesterov=nesterov)

    def ADAMoptimizer(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False):
        self.optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, amsgrad=amsgrad, name="Adam")

    def add_layer(self, num_nodes, kernel_reg=None):
        self.model.add(Dense(num_nodes, activation="relu", use_bias=True, kernel_regularizer=kernel_reg))
    
    def add_output_layer(self, out_nodes, kernel_reg=None):
        self.model.add(Dense(out_nodes, activation="relu", use_bias=True, kernel_regularizer=kernel_reg))

    def add_dropout(self, rate, seed):
        self.model.add(keras.layers.Dropout(rate=rate, seed=seed))

    def compileModel(self, optimizer, loss="mean_squared_error", metrics=["MSE","MAE", "MAPE"]):
        self.model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    def train_model(self, X, Y, early_stopping=True, patience=3, epochs=10, batch_size=500, vali_split=0.2, shuffle=False):
        if early_stopping:
            earlystop_callback = EarlyStopping(monitor="loss", min_delta=0, patience=patience, mode="min", restore_best_weights=True)
            hist = self.model.fit(X, Y, epochs=epochs, batch_size=batch_size, validation_split=vali_split, callbacks=[earlystop_callback], shuffle=shuffle)
            self.history = pd.DataFrame(hist.history)
            self.history['epoch'] = hist.epoch
        else:
            hist = self.model.fit(X, Y, epochs=epochs, batch_size=batch_size, validation_split=vali_split, shuffle=shuffle)
            self.history = pd.DataFrame(hist.history)
            self.history['epoch'] = hist.epoch

    def show_training_errors(self):
        hist = self.history

        fig1 = plt.figure()
        fig1, ax = plt.subplots(nrows=2, ncols=2, figsize=(15,12))

        ax[0][0].set_xlabel('Epoch')
        ax[0][0].set_ylabel('MSE')
        ax[0][0].plot(hist['epoch'], hist['MSE'], label='Train Error')
        ax[0][0].plot(hist['epoch'], hist['val_MSE'], label='Val Error')
        ax[0][0].legend()
        ax[0][0].set_title("MSE Error")

        ax[0][1].set_xlabel('Epoch')
        ax[0][1].set_ylabel('MAE')
        ax[0][1].plot(hist['epoch'], hist['MAE'], label='Train Error')
        ax[0][1].plot(hist['epoch'], hist['val_MAE'], label='Val Error')
        ax[0][1].legend()
        ax[0][1].set_title("MAE Error")

        ax[1][0].set_xlabel('Epoch')
        ax[1][0].set_ylabel('MAPE')
        ax[1][0].plot(hist['epoch'], hist['MAPE'], label='Mean Abs {} Error'.format("%"))
        ax[1][0].legend()
        ax[1][0].set_title("MAPE")

    def test(self, test_features, test_labels):
        test_input = test_features
        test_output = np.reshape(test_labels, (-1,1))
        prediction = self.model.predict(test_input)
        error = abs(test_output-prediction)
        acc = (error/test_output)*100
        

        fig2 = plt.figure()
        fig2, ax = plt.subplots(nrows=3, figsize=(15,16))
        ax[0].plot(np.arange(len(test_output)), test_output, label="TestData")
        ax[0].plot(np.arange(len(prediction)), prediction, label="Prediction", alpha=0.4)
        ax[0].legend()
        ax[0].set_title("Model Prediction")
        ax[0].set_ylabel("Eta c")

        ax[1].plot(np.arange(len(error)), error, label="abs(test_label-pred)")
        ax[1].legend()
        ax[1].set_ylabel("Error")
        # ax[1].set_title("Model Prediction")

        ax[2].plot(np.arange(len(acc)), acc, label="(error/test_label)*100")
        ax[2].legend()
        ax[2].set_ylabel("MAPE")
        # ax[1].set_title("Model Prediction")
    
    def save(self, loc_and_name):
        self.model.save("/media/nirav/34E0-F309/KTH/Thesis/ModelSummaries/Implementable/{}".format(loc_and_name))
        self.history.to_csv("/media/nirav/34E0-F309/KTH/Thesis/ModelSummaries/Implementable/{}.csv".format(loc_and_name))

In [None]:
loaded_model = keras.models.load_model("/media/nirav/34E0-F309/KTH/Thesis/ModelSummaries/Implementable/33_26_ADAM23")

# Feature Selections
# for noisey TRAINING data use this instead 
# train_features, train_labels = add_noise(train_dataset, noise_var=0.0001, output_n=True)
# split the data
train_features = train_dataset.to_numpy()[:,0:4]
train_labels = train_dataset.to_numpy()[:,4]

# test data
sorted_test = test_dataset.sort_index()
test_features = sorted_test.to_numpy()[:,0:4]
test_labels = sorted_test.to_numpy()[:,4]

X = train_features.copy()
Y = train_labels.copy()

#_________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________
# setup algo
my_model = MLP_Regression(loaded_model)
my_model.ADAMoptimizer(learning_rate=0.000001, beta_1=0.95)
# my_model.SGDoptimizer(momentum=0.6, nesterov=True, initial_lr=1e-1, decay_steps=1e5, decay_rate=0.9)

# run algo
my_model.compileModel(my_model.optimizer, loss="mean_absolute_percentage_error")
my_model.train_model(X,Y, epochs=1, patience=10, batch_size=32, shuffle=True, early_stopping=True, vali_split=0.2)

In [None]:
# save the model
# my_model.save("33_26_ADAM23")

In [None]:
my_model.show_training_errors()

In [None]:
# test_features = sorted_test.sort_values("eta c").to_numpy()[:,0:4]
# test_labels = sorted_test.sort_values("eta c").to_numpy()[:,4]

test_features = sorted_train.sort_values("eta c").to_numpy()[:,0:4]
test_labels = sorted_train.sort_values("eta c").to_numpy()[:,4]

my_model.test(test_features, test_labels)

## Test without training

In [None]:
loaded_model = keras.models.load_model("/media/nirav/34E0-F309/KTH/Thesis/ModelSummaries/Implementable/33_26_ADAM19")

# Feature Selections
# for noisey TRAINING data use this instead 
# train_features, train_labels = add_noise(train_dataset, noise_var=0.0001, output_n=True)
# split the data
train_features = train_dataset.to_numpy()[:,0:4]
train_labels = train_dataset.to_numpy()[:,4]

# test data
sorted_test = test_dataset.sort_index()
test_features = sorted_test.to_numpy()[:,0:4]
test_labels = sorted_test.to_numpy()[:,4]

X = train_features.copy()
Y = train_labels.copy()

#_________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________
# setup algo
my_model = MLP_Regression(loaded_model)

In [None]:
sorted_train_dataset = train_dataset.sort_index()
train_features = sorted_train_dataset.to_numpy()[:,0:4]
train_labels = sorted_train_dataset.to_numpy()[:,4]

my_model.test(train_features, train_labels)

We can then retrain the data. For instance, ADAM18 can be trained on a noisey set, then we can train on a non- noisey set. 