In [1]:


import os
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
from joblib import parallel_backend
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

DATA_DIR = Path("../../../data/processed")
train = pd.read_csv(DATA_DIR / 'train_fe_scaled.csv')
val = pd.read_csv(DATA_DIR / 'val_fe_scaled.csv')

TARGET = 'Calories'
FEATURES = [c for c in train.columns if c not in ['id', TARGET]]
X_train, y_train = train[FEATURES], train[TARGET]
X_val, y_val = val[FEATURES], val[TARGET]


In [2]:
def report_results(model_name, grid, X_val, y_val):
    best = grid.best_estimator_
    preds = best.predict(X_val)
    mae = mean_absolute_error(y_val, preds)
    mse = mean_squared_error(y_val, preds)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_val, preds)
    return {
        "model": model_name,
        "best_params": grid.best_params_,
        "MAE": mae,
        "RMSE": rmse,
        "R2": r2
    }, best

RESULTS_CSV = "../../results/baseline_results.csv"

def append_result_to_csv(result_dict, csv_path=RESULTS_CSV):
    df_new = pd.DataFrame([result_dict])
    if os.path.exists(csv_path):
        df_existing = pd.read_csv(csv_path)
        df_all = pd.concat([df_existing, df_new], ignore_index=True)
    else:
        df_all = df_new
    df_all.to_csv(csv_path, index=False)

In [3]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import RandomizedSearchCV

results = {}
param_dist_mlp = {
    "hidden_layer_sizes": [(50,), (100,), (100,50)],
    "activation": ["relu", "tanh"],
    "learning_rate_init": [0.001, 0.01],
    "max_iter": [250, 300]
}
mlp_grid = RandomizedSearchCV(MLPRegressor(random_state=42, verbose=True), param_distributions=param_dist_mlp,
                              n_iter=4, cv=3, scoring="neg_mean_squared_error", n_jobs=1, random_state=42,
                              verbose=3)
mlp_grid.fit(X_train, y_train)  
res, best = report_results("MLPRegressor", mlp_grid, X_val, y_val)
results["MLPRegressor"] = res


Fitting 3 folds for each of 4 candidates, totalling 12 fits
Iteration 1, loss = 223.62951231
Iteration 2, loss = 6.96271223
Iteration 3, loss = 6.79865477
Iteration 4, loss = 6.73301307
Iteration 5, loss = 6.70652268
Iteration 6, loss = 6.67003773
Iteration 7, loss = 6.63140040
Iteration 8, loss = 6.60778383
Iteration 9, loss = 6.61824473
Iteration 10, loss = 6.57107551
Iteration 11, loss = 6.59264477
Iteration 12, loss = 6.57879926
Iteration 13, loss = 6.56376429
Iteration 14, loss = 6.54580840
Iteration 15, loss = 6.53925177
Iteration 16, loss = 6.52270840
Iteration 17, loss = 6.51567777
Iteration 18, loss = 6.50623987
Iteration 19, loss = 6.47957608
Iteration 20, loss = 6.48289757
Iteration 21, loss = 6.48980218
Iteration 22, loss = 6.46797735
Iteration 23, loss = 6.47871133
Iteration 24, loss = 6.46372478
Iteration 25, loss = 6.45879164
Iteration 26, loss = 6.44505151
Iteration 27, loss = 6.44333137
Iteration 28, loss = 6.41420816
Iteration 29, loss = 6.42033132
Iteration 30, loss 



[CV 1/3] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate_init=0.001, max_iter=250;, score=-13.658 total time=16.8min
Iteration 1, loss = 225.85352978
Iteration 2, loss = 7.28850271
Iteration 3, loss = 7.11331602
Iteration 4, loss = 7.03993440
Iteration 5, loss = 7.00271579
Iteration 6, loss = 6.94260117
Iteration 7, loss = 6.90595007
Iteration 8, loss = 6.89613770
Iteration 9, loss = 6.84966551
Iteration 10, loss = 6.83129233
Iteration 11, loss = 6.82670499
Iteration 12, loss = 6.80604693
Iteration 13, loss = 6.83136105
Iteration 14, loss = 6.76155953
Iteration 15, loss = 6.77464202
Iteration 16, loss = 6.74075055
Iteration 17, loss = 6.73702749
Iteration 18, loss = 6.73447076
Iteration 19, loss = 6.70447250
Iteration 20, loss = 6.69720045
Iteration 21, loss = 6.71690923
Iteration 22, loss = 6.69856135
Iteration 23, loss = 6.68395698
Iteration 24, loss = 6.67128297
Iteration 25, loss = 6.65465315
Iteration 26, loss = 6.63705279
Iteration 27, loss = 6.63115086
Iteration



[CV 2/3] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate_init=0.001, max_iter=250;, score=-12.761 total time=29.8min
Iteration 1, loss = 222.99107231
Iteration 2, loss = 7.13891874
Iteration 3, loss = 6.97532210
Iteration 4, loss = 6.90294276
Iteration 5, loss = 6.90100095
Iteration 6, loss = 6.84276370
Iteration 7, loss = 6.80033162
Iteration 8, loss = 6.81377697
Iteration 9, loss = 6.76404790
Iteration 10, loss = 6.76292437
Iteration 11, loss = 6.75536819
Iteration 12, loss = 6.74692538
Iteration 13, loss = 6.72767345
Iteration 14, loss = 6.71194373
Iteration 15, loss = 6.70080057
Iteration 16, loss = 6.68100108
Iteration 17, loss = 6.65948667
Iteration 18, loss = 6.65534896
Iteration 19, loss = 6.64362887
Iteration 20, loss = 6.64802183
Iteration 21, loss = 6.62948591
Iteration 22, loss = 6.61249382
Iteration 23, loss = 6.62949627
Iteration 24, loss = 6.61114873
Iteration 25, loss = 6.60128470
Iteration 26, loss = 6.60241044
Iteration 27, loss = 6.57375079
Iteration



[CV 2/3] END activation=tanh, hidden_layer_sizes=(100,), learning_rate_init=0.001, max_iter=250;, score=-12.372 total time= 6.4min
Iteration 1, loss = 1531.34136176
Iteration 2, loss = 105.76070099
Iteration 3, loss = 23.70730957
Iteration 4, loss = 10.37098841
Iteration 5, loss = 7.53123705
Iteration 6, loss = 6.93578595
Iteration 7, loss = 6.77389488
Iteration 8, loss = 6.71667487
Iteration 9, loss = 6.68340046
Iteration 10, loss = 6.65356190
Iteration 11, loss = 6.63908505
Iteration 12, loss = 6.62298880
Iteration 13, loss = 6.60920330
Iteration 14, loss = 6.60026477
Iteration 15, loss = 6.58545959
Iteration 16, loss = 6.57989525
Iteration 17, loss = 6.57253926
Iteration 18, loss = 6.56884851
Iteration 19, loss = 6.56141874
Iteration 20, loss = 6.55402555
Iteration 21, loss = 6.55003687
Iteration 22, loss = 6.53772104
Iteration 23, loss = 6.53420250
Iteration 24, loss = 6.52779573
Iteration 25, loss = 6.52411013
Iteration 26, loss = 6.51473133
Iteration 27, loss = 6.50802236
Iterati



[CV 3/3] END activation=tanh, hidden_layer_sizes=(100,), learning_rate_init=0.001, max_iter=250;, score=-12.701 total time= 6.5min
Iteration 1, loss = 993.64399520
Iteration 2, loss = 22.92647721
Iteration 3, loss = 10.11119647
Iteration 4, loss = 7.87937071
Iteration 5, loss = 7.21138760
Iteration 6, loss = 6.94725304
Iteration 7, loss = 6.82956457
Iteration 8, loss = 6.75802048
Iteration 9, loss = 6.70760562
Iteration 10, loss = 6.67963887
Iteration 11, loss = 6.65099879
Iteration 12, loss = 6.63017390
Iteration 13, loss = 6.61132295
Iteration 14, loss = 6.60163602
Iteration 15, loss = 6.59202078
Iteration 16, loss = 6.58169392
Iteration 17, loss = 6.57259398
Iteration 18, loss = 6.56888057
Iteration 19, loss = 6.55921320
Iteration 20, loss = 6.55160458
Iteration 21, loss = 6.54398454
Iteration 22, loss = 6.54140416
Iteration 23, loss = 6.53468463
Iteration 24, loss = 6.53171284
Iteration 25, loss = 6.52215191
Iteration 26, loss = 6.51917040
Iteration 27, loss = 6.51523495
Iteration 



Iteration 1, loss = 995.10336294
Iteration 2, loss = 23.23087698
Iteration 3, loss = 10.52046785
Iteration 4, loss = 8.19045859
Iteration 5, loss = 7.51494874
Iteration 6, loss = 7.25318657
Iteration 7, loss = 7.13557344
Iteration 8, loss = 7.06634294
Iteration 9, loss = 7.01220841
Iteration 10, loss = 6.98741986
Iteration 11, loss = 6.95905001
Iteration 12, loss = 6.94096671
Iteration 13, loss = 6.92315111
Iteration 14, loss = 6.91152933
Iteration 15, loss = 6.89333273
Iteration 16, loss = 6.88776462
Iteration 17, loss = 6.87839710
Iteration 18, loss = 6.87282705
Iteration 19, loss = 6.86260079
Iteration 20, loss = 6.85667062
Iteration 21, loss = 6.85303821
Iteration 22, loss = 6.84482809
Iteration 23, loss = 6.83985093
Iteration 24, loss = 6.83828000
Iteration 25, loss = 6.83128011
Iteration 26, loss = 6.82666122
Iteration 27, loss = 6.82096473
Iteration 28, loss = 6.81432997
Iteration 29, loss = 6.80954035
Iteration 30, loss = 6.80600938
Iteration 31, loss = 6.79682470
Iteration 32,



Iteration 1, loss = 999.51995219
Iteration 2, loss = 23.81257880
Iteration 3, loss = 10.35640592
Iteration 4, loss = 7.89316064
Iteration 5, loss = 7.37311639
Iteration 6, loss = 7.12545310
Iteration 7, loss = 6.99245033
Iteration 8, loss = 6.91645898
Iteration 9, loss = 6.86653537
Iteration 10, loss = 6.83187713
Iteration 11, loss = 6.80809632
Iteration 12, loss = 6.79240902
Iteration 13, loss = 6.77926644
Iteration 14, loss = 6.76145703
Iteration 15, loss = 6.74756840
Iteration 16, loss = 6.73879679
Iteration 17, loss = 6.72991034
Iteration 18, loss = 6.71948181
Iteration 19, loss = 6.71496991
Iteration 20, loss = 6.70998912
Iteration 21, loss = 6.70472179
Iteration 22, loss = 6.69919632
Iteration 23, loss = 6.68984287
Iteration 24, loss = 6.68712000
Iteration 25, loss = 6.68103346
Iteration 26, loss = 6.67762201
Iteration 27, loss = 6.67580969
Iteration 28, loss = 6.67083323
Iteration 29, loss = 6.66721790
Iteration 30, loss = 6.66512499
Iteration 31, loss = 6.65625256
Iteration 32,

In [None]:
# Crear directorio si no existe
os.makedirs("../../results/models", exist_ok=True)
joblib.dump(best, "../../results/models/MLPRegressor.joblib")
append_result_to_csv(res)

In [16]:
results_df = pd.DataFrame(results).T
print(results_df)
results_df.to_csv("../../results/models/baseline_results_mlp.csv")


                     model                                        best_params  \
MLPRegressor  MLPRegressor  {'max_iter': 250, 'learning_rate_init': 0.001,...   

                   MAE      RMSE        R2  
MLPRegressor  2.130965  3.567028  0.996717  
