In [2]:
import sys
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

sys.path.append(os.path.abspath("../src"))

from NeuralNet import NeuralNet

In [3]:
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")

print("Train shape:", train.shape)
print("Test shape:", test.shape)

Train shape: (19237, 18)
Test shape: (8245, 18)


In [4]:
numeric_object_cols = ['Engine volume','Mileage','Levy']

for col in numeric_object_cols:
    # Eliminar caracteres que no sean dígitos o punto
    train[col] = train[col].replace(r'[^\d.]', '', regex=True)
    # Convertir a float, strings vacíos se vuelven NaN
    train[col] = pd.to_numeric(train[col], errors='coerce')

# Rellenar NaN con 0 (o puedes usar la media)
train[numeric_object_cols] = train[numeric_object_cols].fillna(0)

In [5]:
cat_cols = ['Levy','Manufacturer','Model','Category','Leather interior','Fuel type',
            'Gear box type','Drive wheels','Doors','Wheel','Color']

num_cols = ['Prod. year','Cylinders','Airbags','Engine volume','Mileage']

# Variable objetivo
y = train['Price'].values.reshape(-1,1)

In [7]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols)
    ]
)

X = preprocessor.fit_transform(train)

In [8]:
idx = np.arange(X.shape[0])
np.random.shuffle(idx)
X, y = X[idx], y[idx]

In [None]:
if hasattr(X, "toarray"):
    X = X.toarray()

input_size = X.shape[1]

# Definir red: 1 capa oculta de 20 neuronas, activación sigmoid
nn = NeuralNet(
    n_layers=3,
    n_units=[input_size, 20, 1],
    epochs=50,
    lr=0.01,
    momentum=0.9,
    activation='sigmoid',
    val_percent=0.2
)

nn.fit(X, y)
y_pred = nn.predict(X)

TypeError: sparse array length is ambiguous; use getnnz() or shape[0]

In [None]:
loss = nn.loss_epochs()
plt.plot(loss[:,0], label='Train MSE')
plt.plot(loss[:,1], label='Validation MSE')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.title('Evolución del error')
plt.legend()
plt.show()

print("Errores por época (última fila = última epoch):", loss[-1])