In [None]:
#1
import sys
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

sys.path.append(os.path.abspath("../src"))
from NeuralNet import NeuralNet

In [None]:
#2
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")

print("Train shape:", train.shape)
print("Test shape:", test.shape)


In [None]:
#3
numeric_object_cols = ['Engine volume','Mileage','Levy']

for col in numeric_object_cols:
    train[col] = train[col].replace(r'[^\d.]', '', regex=True)
    train[col] = pd.to_numeric(train[col], errors='coerce')

train[numeric_object_cols] = train[numeric_object_cols].fillna(0)


In [None]:
#4
cat_cols = ['Levy','Manufacturer','Model','Category','Leather interior','Fuel type',
            'Gear box type','Drive wheels','Doors','Wheel','Color']

num_cols = ['Prod. year','Cylinders','Airbags','Engine volume','Mileage']


In [None]:
#5
y = train['Price'].values.reshape(-1,1).astype(float)

y_mean = y.mean()
y_std = y.std()
y_norm = (y - y_mean) / y_std


In [None]:
#6
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols)
    ]
)

X = preprocessor.fit_transform(train)


In [None]:
#7
idx = np.arange(X.shape[0])
np.random.shuffle(idx)
X, y_norm = X[idx], y_norm[idx]
y = y[idx]  


In [None]:
#8
if hasattr(X, "toarray"):
    X = X.toarray()


In [None]:
#9
input_size = X.shape[1]

nn = NeuralNet(
    n_layers=3,
    n_units=[input_size, 20, 1],
    epochs=50,
    lr=0.001,
    momentum=0.8,
    activation='sigmoid',
    val_percent=0.2
)

nn.fit(X, y_norm)
y_pred_norm = nn.predict(X)
y_pred = y_pred_norm * y_std + y_mean


In [None]:
#10
loss = nn.loss_epochs()
plt.plot(loss[:,0], label='Train MSE')
plt.plot(loss[:,1], label='Validation MSE')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.title('EvoluciÃ³n del error')
plt.legend()
plt.show()

print("Ãšltimo error (train, val):", loss[-1])


In [None]:
#11
plt.scatter(y, y_pred, alpha=0.3)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.xlabel('Real Price')
plt.ylabel('Predicted Price')
plt.title('Prediction vs Real (Custom BP)')
plt.show()


In [None]:
#12
param_grid = [
    {"layers": 3, "units": [input_size, 10, 1], "lr": 0.01, "momentum": 0.5, "act": "sigmoid"},
    {"layers": 3, "units": [input_size, 20, 1], "lr": 0.01, "momentum": 0.9, "act": "sigmoid"},
    {"layers": 3, "units": [input_size, 30, 1], "lr": 0.005, "momentum": 0.8, "act": "tanh"},
    {"layers": 3, "units": [input_size, 50, 1], "lr": 0.001, "momentum": 0.9, "act": "relu"},
    {"layers": 4, "units": [input_size, 32, 16, 1], "lr": 0.01, "momentum": 0.7, "act": "sigmoid"},
    {"layers": 4, "units": [input_size, 64, 32, 1], "lr": 0.005, "momentum": 0.8, "act": "tanh"},
    {"layers": 3, "units": [input_size, 16, 1], "lr": 0.02, "momentum": 0.5, "act": "sigmoid"},
    {"layers": 3, "units": [input_size, 40, 1], "lr": 0.002, "momentum": 0.9, "act": "tanh"},
    {"layers": 3, "units": [input_size, 25, 1], "lr": 0.01, "momentum": 0.9, "act": "relu"},
    {"layers": 4, "units": [input_size, 32, 16, 1], "lr": 0.001, "momentum": 0.8, "act": "linear"}
]



In [None]:
#13
results = []
best_mse = float('inf')
best_preds = None
best_params = None
best_loss_curve = None

for i, params in enumerate(param_grid):
    print(f"\nðŸ”Ž Training model {i+1}/{len(param_grid)} with params: {params}")
    
    nn = NeuralNet(
        n_layers=params["layers"],
        n_units=params["units"],
        epochs=50,
        lr=params["lr"],
        momentum=params["momentum"],
        activation=params["act"],
        val_percent=0.2
    )

    nn.fit(X, y_norm)
    loss_curve = nn.loss_epochs()
    
    y_pred_norm = nn.predict(X)
    y_pred = y_pred_norm * y_std + y_mean

    mse = mean_squared_error(y, y_pred)
    mae = mean_absolute_error(y, y_pred)
    mape = np.mean(np.abs((y - y_pred) / y)) * 100

    results.append({
        "Layers": params["layers"],
        "Structure": str(params["units"]),
        "LR": params["lr"],
        "Momentum": params["momentum"],
        "Act": params["act"],
        "MSE": mse,
        "MAE": mae,
        "MAPE": mape,
        "Loss": loss_curve
    })

    if mse < best_mse:
        best_mse = mse
        best_preds = y_pred
        best_params = params
        best_loss_curve = loss_curve
    
    print(f" â†’ MSE: {mse:.2f}, MAE: {mae:.2f}, MAPE: {mape:.2f}%")


In [None]:
#14
df_results = pd.DataFrame(results)
df_results = df_results.sort_values("MSE").reset_index(drop=True)
df_results


In [None]:
#15
best_idx = df_results["MSE"].idxmin()
best_model = df_results.iloc[best_idx]
best_model


In [None]:
#16
plt.scatter(y, best_preds, alpha=0.3)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.xlabel("Real Price")
plt.ylabel("Predicted Price")
plt.title("Best Model: Predicted vs Real")
plt.show()


In [None]:
#17
loss = best_loss_curve
plt.plot(loss[:,0], label='Train MSE')
plt.plot(loss[:,1], label='Validation MSE')
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.legend()
plt.title("Best Model Loss Curve")
plt.show()


In [None]:
#18
print("y first 10 values:\n", y[:10])
print("y shape:", y.shape)
print("min:", y.min(), "max:", y.max())
print("mean:", y.mean(), "std:", y.std())
