In [None]:
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import ast

sys.path.append(os.path.abspath('../src/'))
from ActivationFunctions import *
from NeuralNetwork import *
from MyUtils import *
from ModelSelection import *


%matplotlib inline

In [None]:
tr_norm_df = pd.read_csv('../data/divided_std_train_0_8.csv')
test_norm_df = pd.read_csv('../data/divided_std_test_0_2.csv')

tr_df = pd.read_csv('../data/divided_train_0_8.csv')
test_df = pd.read_csv('../data/divided_test_0_2.csv')

ML_cup_train = pd.read_csv('../data/ML-CUP23-TR.csv', header=None, index_col=0, comment='#').sample(frac=1, random_state=RANDOM_STATE)
TR_INPUT = 10
TR_OUTPUT = 3
scaler_in = StandardScaler()
scaler_out = StandardScaler()
ML_cup_train_norm = ML_cup_train.values.copy()

ML_cup_train_norm[:,:TR_INPUT] = scaler_in.fit_transform(ML_cup_train_norm[:,:TR_INPUT])
ML_cup_train_norm[:,TR_INPUT:] = scaler_out.fit_transform(ML_cup_train_norm[:,TR_INPUT:])

training_len = len(ML_cup_train_norm)

In [None]:
def load_obj(path):    
    with open(path, 'r') as file:
        ret = json.load(file)
    for el in ret:
        with open(el['nn_file_name'], 'r') as file:
                el['model'] = NeuralNetwork.fromJSON(file.read())
    return ret
            
def save_obj(obj, path):
    for i in obj:
        with open(i['nn_file_name'], 'w+') as file:
            file.write(i['model'].toJSON())
        i['model'] = None
    json.dump(obj, path, default=str)

In [None]:
hidden_len = 32
hidden_fun = 'sigmoid'
output_fun = 'identity'
sigmoid_l1 = create_stratified_topology([TR_INPUT,hidden_len,TR_OUTPUT], 
                                      [[None,[]]]*TR_INPUT + [[hidden_fun, [1]]]*hidden_len + [[output_fun, []]]*TR_OUTPUT)
NeuralNetwork.display_topology(sigmoid_l1)

In [None]:
def load_obj(path):    
    with open(path, 'r') as file:
        ret = json.load(file)
    for el in ret:
        with open(el['nn_file_name'], 'r') as file:
                el['model'] = NeuralNetwork.fromJSON(file.read())
    return ret
            
def save_obj(obj, path):
    for i in obj:
        with open(i['nn_file_name'], 'w+') as file:
            file.write(i['model'].toJSON())
        i['model'] = None
    json.dump(obj, path, default=str)

In [None]:
def remove_useless_val(x):
    a =['learning_rate', 'lr_decay_tau', 'alpha_momentum']
    b =['adamax_learning_rate', 'exp_decay_rate_1', 'exp_decay_rate_2']
    if x['adamax']:
        for i in a:
            if i in x.keys():
                x[i] = None
    else:
        for i in b:
            if i in x.keys():
                x[i] = None   
    return x

In [None]:
columns_order = ['topology', 'stats',
 'batch_size',
 'min_epochs',
 'max_epochs',
 'patience',
 'error_increase_tolerance',
 'lambda_tikhonov',
 
 'learning_rate',
 'alpha_momentum',
 'lr_decay_tau',
 
 'adamax',
 'adamax_learning_rate',
 'exp_decay_rate_1',
 'exp_decay_rate_2',
 
 'mean_mean_euclidean_error',
 'mean_mean_squared_error',
 'var_mean_euclidean_error',
 'var_mean_squared_error',
 'mean_best_validation_training_error']

In [None]:
results_files = ['bagging_sub_model.csv']
topologies = ['32_sigmoid']
folder = '../data/gs_data/'
topologies_dict = {}
gs_results = []
for i, f in enumerate(results_files):
    if os.path.isfile(folder+ f):
        dummy = pd.read_csv(folder + f)
        topologies_dict[topologies[i]] = ast.literal_eval(dummy['topology'][0])
        dummy['topology'] = topologies[i]
        
        gs_results.append(dummy)

orig_df = pd.concat(gs_results)

In [None]:
columns_order = [x for x in columns_order if x in orig_df.columns]
orig_df = orig_df[columns_order]

order_by = 'mean_mean_euclidean_error'
orig_df.sort_values(by=[order_by], inplace=True)
orig_df = orig_df.reset_index(drop=True)
gs_results = orig_df.drop(['stats'], axis=1)

In [None]:
gs_results = gs_results.apply(remove_useless_val, axis=1)
for i in gs_results.columns[1:]:
    gs_results[i] = gs_results[i].astype(float)

In [None]:
numerical_col = list(gs_results.columns)
numerical_col.remove('topology')
st_opt_col = ['learning_rate','lr_decay_tau','alpha_momentum']
adamax_opt_col = ['adamax','adamax_learning_rate','exp_decay_rate_1','exp_decay_rate_2']
metrics_col = [x for x in gs_results.columns if x.startswith(('var', 'mean'))]
general_col = [item for item in list(gs_results.columns) if item not in st_opt_col and item not in metrics_col and item not in adamax_opt_col]
tr_input_col = [item for item in list(gs_results.columns) if item not in metrics_col]

In [None]:
gen = np.random.default_rng(seed=None)
def get_new_tr_vl(pattern_set, len_ds, gen):
    return gen.choice(pattern_set, len_ds)

In [None]:
n_models = 32
tr_len = 533
max_epochs = 500
mod = []

met = [ErrorFunctions.mean_squared_error, ErrorFunctions.mean_euclidean_error, ]
predictions_accumul_tr = np.zeros((max_epochs, training_len, TR_OUTPUT))

for i in range(n_models):
    
    tr = get_new_tr_vl(ML_cup_train_norm, 533, gen)
    NN = NeuralNetwork(sigmoid_l1, -0.75, 0.75, True, (i + 90129090))
    stats = NN.train(training_set = tr, 
                    validation_set = None, 
                    
                    batch_size= 8, 
                    max_epochs= max_epochs, 
                    min_epochs= 150,
                    retrainig_es_error = orig_df.iloc[0]['mean_best_validation_training_error'],
                    patience = 5, 
                    error_increase_tolerance = 0.000001, 
                    
                    lambda_tikhonov = 1.000000e-09, # off
                    
                    adamax = False,
                    
                    learning_rate = 0.11/8,
                    lr_decay_tau = 200, # off
                    eta_tau= (0.11/8)*0.01,
                    alpha_momentum = 0.85, # off
                    nesterov = False,
                    
                    metrics = [ErrorFunctions.mean_squared_error, ErrorFunctions.mean_euclidean_error, ], 
                    collect_data=True, 
                    collect_data_batch=False, 
                    verbose=True,
                    
                    supp_dataset = ML_cup_train_norm)

        
    predictions_accumul_tr += np.array(stats['training_pred'] + 
                                            [stats['training_pred'][-1]]*(max_epochs - stats['epochs']))

    
    mod.append([NN, stats, tr])
    with open('../data/net/models_7_final_retr/model_' + str(i) + '.json', 'w+') as file:
            file.write(NN.toJSON())

In [None]:
def construct_obj(list_mod):
    interesting_model = []
    for i in range(len(list_mod)):
        
        dummy = {}
        dummy['nn_file_name'] = '../data/net/models_7_final_retr/model_' + str(i) + '.json'
        dummy['index'] = i
        dummy['model'] = list_mod[i][0]
        dummy['stats'] = list_mod[i][1]
        dummy['top_name'] = '32_sigmoid'
        
        dummy['std_prediction_tr'] = dummy['model'].predict_array(list_mod[i][2][:,:TR_INPUT])      
        dummy['prediction_tr'] = scaler_out.inverse_transform(dummy['std_prediction_tr'])
        dummy['std_tr_error'] = ErrorFunctions.mean_euclidean_error(dummy['std_prediction_tr'], list_mod[i][2][:,TR_INPUT:])
             
        dummy['tr_error'] = ErrorFunctions.mean_euclidean_error(dummy['prediction_tr'], scaler_out.inverse_transform(list_mod[i][2][:,TR_INPUT:]))
        interesting_model.append(dummy)
        
    return interesting_model

In [None]:
models_list = construct_obj(mod)

In [None]:
with open('../data/net/models_7_final_retr/models.json', 'w+') as file:
    save_obj(models_list, file)

In [None]:
with open('../data/net/models_7_final_retr/ens_curves_tr.json', 'w+') as file:
    file.write(json.dumps(predictions_accumul_tr.tolist()))