In [1]:
from tqdm import tqdm
import multiprocessing
import pandas as pd
import json
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
import joblib
import sys

def read_json_file2(file_path):
    with open(file_path, "r") as f:
        return [json.loads(line) for line in f]

'''masstestClassifier'''
results4 = pd.DataFrame(read_json_file2("masstestClassifier2.json"))


kernels = results4['kernels']
columns = ['kernels', 'ColorR', 'ColorG', 'ColorB', 'massR',
            'massG', 'massB', 'name', 'T', 'R', 'mass', 'variance', 
            'averageLinearSpeed', 'averageVariance', 'averageVarianceSpeed', 
            'averageAngleSpeed', 'averageMass']

out = ['averageLinearSpeed', 'averageVariance', 'averageVarianceSpeed', 'averageAngleSpeed', 'averageMass']
for out_column in out:
    columns_without_output = columns.copy()
    columns_without_output.pop(columns_without_output.index(out_column))

    results4_cleaned = results4.drop(columns=columns_without_output)

    # Funzione per estrarre i valori 'm', 's' e 'h' dai kernel
    def extract_kernel_values(kernel_list):
        values = {}
        for i, kernel in enumerate(kernel_list):
            values[f'm{i}'] = kernel.get('m', None)
            values[f's{i}'] = kernel.get('s', None)
            values[f'h{i}'] = kernel.get('h', None)
        return values

    # Applica la funzione a ogni riga del DataFrame
    kernel_values = kernels.apply(extract_kernel_values)

    # Converti la lista di dizionari in un DataFrame
    kernel_values_df = pd.DataFrame(kernel_values.tolist())
    results4_concat = pd.concat([results4_cleaned, kernel_values_df], axis=1)

    '''Dataset sampling'''
    dataset = results4_concat.sample(frac=1, ignore_index=True) #shuffle sample in the training set

    mean = dataset.mean()[1:]
    std = dataset.std()[1:]

    with open('meanandstd.json', 'w') as f:
        f.write(json.dumps({'mean': mean.to_dict(), 'std': std.to_dict()}))

    TRAIN_TEST_SPLIT_PERCENTAGE = 0.9
    dataset_training = dataset[:int(len(dataset) * TRAIN_TEST_SPLIT_PERCENTAGE)]
    dataset_test = dataset[int(len(dataset) * TRAIN_TEST_SPLIT_PERCENTAGE):]

    # Separare le caratteristiche (X) e il target (y)
    X = dataset_training.drop(columns=[out_column])  # tutte le colonne tranne out_string
    y = dataset_training[out_column]  # solo la colonna out_string

    # Lo stesso per il set di test
    X_test = dataset_test.drop(columns=[out_column])
    y_test = dataset_test[out_column]


    "normalization"
    X = (X - mean) / std
    X_test = (X_test - mean) / std

    "bias column"
    X_test['bias'] = 1
    X['bias'] = 1

    # Crea il modello MLPClassifier
    mlp = MLPRegressor(hidden_layer_sizes=(20, 20, 20), max_iter=2000, random_state=42, 
                       learning_rate='adaptive', )

    # Addestra il modello con una barra di progresso
    for i in range(mlp.max_iter):
        mlp.partial_fit(X, y)
        print("Model for", out_column, "is training: ", i, "/", mlp.max_iter)
        print("Test score:", mlp.score(X_test, y_test))
        print("Model score:", mlp.score(X, y))

    # Salvataggio del modello
    joblib.dump(mlp, f'mlp_model_{out_column}.pkl')


Model for averageLinearSpeed is training:  0 / 2000
Test score: 0.06225434617980885
Model score: 0.07042842139538885
Model for averageLinearSpeed is training:  1 / 2000
Test score: 0.10494852418599365
Model score: 0.11229308025384255
Model for averageLinearSpeed is training:  2 / 2000
Test score: 0.11406626132764885
Model score: 0.12134181993516979
Model for averageLinearSpeed is training:  3 / 2000
Test score: 0.11901593889380102
Model score: 0.12724776530532278
Model for averageLinearSpeed is training:  4 / 2000
Test score: 0.12347345282885236
Model score: 0.1329907004383134
Model for averageLinearSpeed is training:  5 / 2000
Test score: 0.1268873869553454
Model score: 0.13872793353484902
Model for averageLinearSpeed is training:  6 / 2000
Test score: 0.12935105252081847
Model score: 0.14345624154062497
Model for averageLinearSpeed is training:  7 / 2000
Test score: 0.13136783617658876
Model score: 0.14748893700152232
Model for averageLinearSpeed is training:  8 / 2000
Test score: 0.



Model for averageLinearSpeed is training:  658 / 2000
Test score: 0.09149741176198456
Model score: 0.2199282595211376
Model for averageLinearSpeed is training:  659 / 2000
Test score: 0.10563604597067289
Model score: 0.22514536508586103
Model for averageLinearSpeed is training:  660 / 2000
Test score: 0.10365633514671435
Model score: 0.21625834279450207
Model for averageLinearSpeed is training:  661 / 2000
Test score: 0.1123418450402347
Model score: 0.21672596123614296
Model for averageLinearSpeed is training:  662 / 2000
Test score: 0.12463362475588169
Model score: 0.2214871447086173
Model for averageLinearSpeed is training:  663 / 2000
Test score: 0.11772548186422838
Model score: 0.22092959196943318
Model for averageLinearSpeed is training:  664 / 2000
Test score: 0.11634599806040069
Model score: 0.2095059008328487
Model for averageLinearSpeed is training:  665 / 2000
Test score: 0.11672187386644572
Model score: 0.22073642595604548
Model for averageLinearSpeed is training:  666 / 200