# Imports

In [None]:
import numpy as np
import pandas as pd
import json
import os
import glob
import random
import gc
import keras
import mcfly
import tensorflow as tf

In [None]:
#record of the index of the first positive sample and the last positive sample
indexes = pd.read_csv("Data/info/info.csv")

#paths to the labels and the data
labels_path = "Data/labels/labels.npy"
samples_path = "Data/samples/"

#set to True if want to generate models, if already ran the script and have the models set to False
generate_models = False


#if generate_models = True, create paths to store model types, architectures and hyperparameters
archi_path = "Models/architecture/architecture_"
params_path = "Models/parameters/params_"
type_path = "Models/type/type_"

#set the seed 
random.seed(0)

In [None]:
indexes

In [None]:
first_postive_idx = indexes.iloc[0,0]

In [None]:
last_postive_idx = indexes.iloc[0,1]

In [None]:
negative_indexes = range(first_postive_idx) #last number (first positive) is excluded
positive_indexes = range(first_postive_idx, last_postive_idx + 1, 1) #add 1 to include last positive

train_val_test_dict = {
    "train": [], 
    "val": [], 
    "test": []}

In [None]:
#function to create dictionary with train, validation and test set file indexes.
#train_proportion is the desired proportion, expressed as a decimal in [0,1], of train samples.
#val_proportion is the desired proportion, expressed as a decimal in [0,1], of validation samples.

#done separately for positive and negative samples, e.g.: train set gets 80% of data.
#80% of the negative samples and 80% of the positive samples make the train set.

def draw_samples(file_indexes, train_val_test_dict, train_proportion, val_proportion):
    
    n_samples = len(file_indexes)
    random_idx = random.sample(file_indexes, n_samples)
    train_max_idx = round(train_proportion * n_samples)
    val_max_idx = round((train_proportion + val_proportion) * n_samples)
    
    train = np.array(random_idx[0:train_max_idx + 1]).astype(str)
    val = np.array(random_idx[train_max_idx +1  : val_max_idx +1 ]).astype(str)
    test = np.array(random_idx[val_max_idx +1 : None]).astype(str)
    
    train = ["id-" + sub for sub in train]
    val = ["id-" + sub for sub in val]
    test = ["id-" + sub for sub in test]
    
    train_val_test_dict["train"] = train_val_test_dict["train"] + train
    train_val_test_dict["val"] = train_val_test_dict["val"] + val
    train_val_test_dict["test"] = train_val_test_dict["test"] + test 
            
    return train_val_test_dict

In [None]:
train_val_test_dict = draw_samples(negative_indexes, train_val_test_dict, 0.8, 0.1)

In [None]:
train_val_test_dict = draw_samples(positive_indexes, train_val_test_dict, 0.8, 0.1)

# Checks

In [None]:
print(len(train_val_test_dict["train"]))
print(len(train_val_test_dict["val"]))
print(len(train_val_test_dict["test"]))
print(len(train_val_test_dict["train"]) + len(train_val_test_dict["val"]) + len(train_val_test_dict["test"]))
print(last_postive_idx +1)

# Generate McFly Models

In [None]:
if generate_models:
    X_train_shape = (len(train_val_test_dict["train"]), 2500, 8)
    models = mcfly.modelgen.generate_models(X_train_shape, 
                                           number_of_classes = 2,
                                           number_of_models = 10,
                                           metrics = ["accuracy"])
    
    models_to_print = range(len(models))
    for i, item in enumerate(models):
        if i in models_to_print:
            model, params, model_types = item
            print("--------------------------------------------------------------------")
            print("Model" + str(i))
            print("  ")
            print("Hyperparameters:")
            print(params)
            print("  ")
            print("Model description:")
            model.summary()
            print("  ")
            print("Model type:")
            print(model_types)
            print(" ") 

            for key, value in params.items():
                if isinstance(value, np.ndarray):
                    params[key] = value.tolist()

            name = "Model" + str(i)
            model_type = {"type": model_types}

            with open(archi_path + name + ".json", "w") as f:
                json.dump(model.to_json(), f)

            with open(params_path + name + ".json", "w") as f:
                json.dump(params, f)

            with open(type_path + name + ".json", "w") as f:
                json.dump(model_type, f)

# Dictionary with labels

In [None]:
labels_array = np.load(labels_path)
labels = dict()

for row in labels_array:
    labels[row[0]] = int(row[1])

del labels_array
gc.collect()

# Data Loader

In [None]:
class DataGenerator(keras.utils.Sequence):    

    def __init__(self, list_IDs, labels, batch_size = 32, dim = (2500, 8), n_channels = 1, n_classes=2, shuffle = True):
        #"Initialization"
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        
    def __len__(self):
        #number of batches per epoch
        return int(np.floor(len(self.list_IDs)/self.batch_size))
    
    def __getitem__(self, index):
        #Generates indexes of one batch of data
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        
        #find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        
        #Generate data
        X, y = self.__data_generation(list_IDs_temp)        
        return X, y
    
    def on_epoch_end(self):
        #updates indexes after each epoch
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
    
    def __data_generation(self, list_IDs_temp):
        #Generates data containing batch_size samples
        
        #Initialise
        X = np.empty((self.batch_size, *self.dim))
        y = np.empty((self.batch_size), dtype = int)
        
        #Generate data
        for i, ID in enumerate(list_IDs_temp):
            #store sample
            X[i,] = np.load( samples_path + ID +".npy")
            
            #store class
            y[i] = self.labels[ID]
        
        return X, keras.utils.to_categorical(y, num_classes = self.n_classes)

In [None]:
#define parameters
params = {"dim" : (2500, 8),
         "batch_size": 32,
         "n_classes": 2,
         "n_channels":1,
         "shuffle" :True}

#batch loader on entire validation set
params_val = {"dim" : (2500, 8),
         "batch_size": len(train_val_test_dict["val"]),
         "n_classes": 2,
         "n_channels":1,
         "shuffle" :False}

#Generators 
training_generator = DataGenerator(train_val_test_dict["train"], labels, **params)
validation_generator = DataGenerator(train_val_test_dict["val"], labels, **params_val)

# Load Model

In [None]:
with open(archi_path + "Model0.json", "r") as f:
    model_loaded = json.load(f)
    model = keras.models.model_from_json(model_loaded)

In [None]:
model.summary()

In [None]:
model.compile(loss="binary_crossentropy", optimizer = "adam", metrics = ["accuracy"]) #add learning rate here, regularisation rate

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor = "loss", patience = 4)

model.fit(training_generator, 
          validation_data = validation_generator, 
          epochs = 50,
          class_weight = {0:1.,1:4.}, 
          callbacks = callback,
          verbose = True)

# Predictions

In [None]:
predictions = model.predict(validation_generator)