In [None]:
import pandas as pd 
import tensorflow as tf
from tensorflow import keras 
from sklearn.model_selection import train_test_split  
import numpy as np 
import matplotlib.pyplot as plt 
from keras_visualizer import visualizer 
import datetime 

In [None]:
def readInputParams(file_name):
    df_params = pd.read_csv(file_name, index_col=False)

    df_params = df_params.rename(columns={
        'modelo': 'name',
        'entrada': 'input_columns',
        'velocidades': 'velocity_columns',
        'camadas internas': 'num_hidden_layers',
        'num neuronios internos': 'num_neurons_per_hidden_layer',
        'funcao de ativacao': 'activation_func'
        })
    
    df_params['input_columns'] = df_params['input_columns'].apply(lambda value: [x.strip() for x in value.split(',')])
    df_params['velocity_columns'] = df_params['velocity_columns'].apply(lambda value: [x.strip() for x in value.split(',')])

    return df_params.to_dict('records') 

In [None]:
def preparaDataset(params, df_original):
     
       df = df_original[params['input_columns']]
       
       duplicated_df = pd.concat([df] * len(params['velocity_columns']), ignore_index=True)

       velo_df = df_original[params['velocity_columns']]

       velo_df = velo_df.melt(var_name='Velocity', value_name='Resistance')
      
       full_df = duplicated_df.join(velo_df, lsuffix='a', rsuffix='b')

       X = np.asarray(full_df.iloc[:, 0:len(params['input_columns']) + 1].values).astype('float32')
       y = np.asarray(full_df['Resistance'].values).astype('float32')  

       return X, y

In [None]:

def splitDataset(X, y, test_size=0.15, val_size=0.15, random_state=None):
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size, random_state=random_state)

    X_train = np.asarray(X_train).astype('float32')
    y_train = np.asarray(y_train).astype('float32')
    X_val = np.asarray(X_val).astype('float32')
    y_val = np.asarray(y_val).astype('float32')
    X_test = np.asarray(X_test).astype('float32')
    y_test = np.asarray(y_test).astype('float32')

    return X_train, y_train, X_val, y_val, X_test, y_test

In [None]:
def buildTopology(params): 
    topology = []

    input_layer = keras.layers.Input(len(params['input_columns']) + 1, name='input')
    topology.append(input_layer)

    hidden_layers = []

    num_neurons = params['num_neurons_per_hidden_layer']

    for i in range(params['num_hidden_layers']):
        inner_layer = keras.layers.Dense(num_neurons, activation=tf.nn.relu, name='inner' + str(i))
        hidden_layers.append(inner_layer)

    topology += hidden_layers

    output_layer = keras.layers.Dense(1, name='output')
    topology.append(output_layer)

    return topology

In [None]:
def trainModel(params, train_set, val_set, number_epochs):
    topology = buildTopology(params)

    model = keras.Sequential(topology)

    model.compile(optimizer='adam', loss='mse', metrics=['mean_absolute_percentage_error'])

    history = model.fit(*train_set, epochs=number_epochs, batch_size=15, validation_data=val_set)

    return model, history

In [None]:
def perGenerationLoss(history, params, number_epochs): 
    loss_train = history.history['loss']
    loss_val = history.history['val_loss']
    epochs = range(1, number_epochs + 1)

    df = pd.DataFrame(data = {
        'epoch': epochs,
        'loss_train': loss_train,
        'loss_val': loss_val
    })

    df.to_csv(sub_folder_name + '/loss_per_generation.csv', index=False)

    plt.plot(epochs, loss_train, 'g', label='Conjunto de treinamento')
    plt.plot(epochs, loss_val, 'b', label='Conjunto de validação')
    plt.title('Erro quadrático médio por conjunto por geração')
    plt.xlabel('Geração')
    plt.ylabel('Erro quadrático médio')
    plt.legend()
    plt.savefig(sub_folder_name + '/loss_per_generation.png')
    plt.show()

def generateReports(model, history, full_dataset, splitted_dataset, params, number_epochs):
    perGenerationLoss(history, params, number_epochs)  

In [None]:
param_sets = readInputParams('params.csv')

df_original = pd.read_csv('Banco de Dados - Completo.csv')

test_vessel_indexes = np.array(random.sample(range(0, 53), 4)) - 1

print('Selected vessels for testing: ', test_vessel_indexes)

df_train = df_original[~df_original.index.isin(test_vessel_indexes)]

df_test = df_original[df_original.index.isin(test_vessel_indexes)]

number_epochs = 100

num_trials_per_model = 10

parent_folder_name = 'results/' + datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S")

for params in param_sets:
    
    dataset_test = preparaDataset(params, df_test)
    dataset = preparaDataset(params, df_train)
    X_train, y_train, X_val, y_val = splitDataset(*dataset)

    best_model = None
    best_global_loss = 999999

    for i in range(num_trials_per_model):
        model, history = trainModel(params, (X_train, y_train), (X_val, y_val), number_epochs)

        global_loss = model.evaluate(*dataset, return_dict=True)['loss']

        if global_loss < best_global_loss:
            best_global_loss = global_loss
            best_model = model

    sub_folder_name = parent_folder_name + '/' + params['name']

    best_model.save(sub_folder_name + '/model.keras')
    
    generateReports(best_model, history, dataset_test, (X_train, y_train, X_val, y_val, *dataset_test), params, number_epochs)