In [None]:
import sys
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

sys.path.append(os.path.abspath("../src"))
from NeuralNet import NeuralNet

In [2]:
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")


In [None]:
numeric_object_cols = ['Engine volume','Mileage','Levy']

for col in numeric_object_cols:
    train[col] = train[col].replace(r'[^\d.]', '', regex=True)
    train[col] = pd.to_numeric(train[col], errors='coerce')

train[numeric_object_cols] = train[numeric_object_cols].fillna(0)


In [None]:
cat_cols = ['Levy','Manufacturer','Model','Category','Leather interior','Fuel type',
            'Gear box type','Drive wheels','Doors','Wheel','Color']

num_cols = ['Prod. year','Cylinders','Airbags','Engine volume','Mileage']


In [None]:
y = train['Price'].values.reshape(-1,1).astype(float)

y_mean = y.mean()
y_std = y.std()
y_norm = (y - y_mean) / y_std


In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols)
    ]
)

X = preprocessor.fit_transform(train)


In [None]:
idx = np.arange(X.shape[0])
np.random.shuffle(idx)
X, y_norm = X[idx], y_norm[idx]
y = y[idx]  # para que coincida con métricas reales


In [None]:
if hasattr(X, "toarray"):
    X = X.toarray()


In [None]:
input_size = X.shape[1]

nn = NeuralNet(
    n_layers=3,
    n_units=[input_size, 20, 1],
    epochs=50,
    lr=0.001,
    momentum=0.8,
    activation='sigmoid',
    val_percent=0.2
)

nn.fit(X, y_norm)
y_pred_norm = nn.predict(X)
y_pred = y_pred_norm * y_std + y_mean


In [10]:
for arch in architectures:
    for act in activations:
        for lr in learning_rates:
            for mom in momentums:
                for valp in val_percents:
                    for ep in epoch_list:

                        nn = NeuralNet(
                            n_layers=len(arch),
                            n_units=arch,
                            epochs=ep,
                            lr=lr,
                            momentum=mom,
                            activation=act,
                            val_percent=valp
                        )

                        nn.fit(X, y_norm)
                        preds_norm = nn.predict(X)
                        preds = preds_norm * y_std + y_mean
                        mse = mean_squared_error(y, preds)

                        results.append({
                            "arch": arch,
                            "activation": act,
                            "lr": lr,
                            "momentum": mom,
                            "val_percent": valp,
                            "epochs": ep,
                            "mse": mse
                        })

                        if mse < best_mse:
                            best_mse = mse
                            best_preds = preds
                            best_params = {
                                "arch": arch,
                                "activation": act,
                                "lr": lr,
                                "momentum": mom,
                                "val_percent": valp,
                                "epochs": ep
                            }
                            best_loss_curve = nn.loss_epochs()


ValueError: setting an array element with a sequence.

In [None]:
print("Mejores hiperparámetros encontrados:")
print(best_params)

print("\nMSE del mejor modelo:", best_mse)


In [None]:
plt.scatter(y, best_preds, alpha=0.3)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')

plt.xlabel("Real Price")
plt.ylabel("Predicted Price")
plt.title("Best Model: Predicted vs Real")
plt.show()


In [None]:
loss = np.array(best_loss_curve)

plt.plot(loss[:,0], label='Train MSE')
plt.plot(loss[:,1], label='Validation MSE')

plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.legend()
plt.title("Loss Curve del Mejor Modelo")
plt.show()
