In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from torch.utils.data import TensorDataset, DataLoader

# Load prepared data
df_prepared = pd.read_pickle('data/02_data_prepared.pkl')

# --- 1. Prepare data for PyTorch ---
X = df_prepared.drop('price', axis=1).values
y = df_prepared['price'].values.reshape(-1, 1) # .reshape is needed to make y a column vector

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch Tensors
X_train_tensor = torch.tensor(X_train.astype(np.float32))
y_train_tensor = torch.tensor(y_train.astype(np.float32))
X_test_tensor = torch.tensor(X_test.astype(np.float32))
y_test_tensor = torch.tensor(y_test.astype(np.float32))

# Create DataLoaders to feed data to the network in batches
batch_size = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# --- 2. Define the Neural Network Architecture ---
class RegressionNet(nn.Module):
    def __init__(self, input_size):
        super(RegressionNet, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.relu1 = nn.ReLU()
        self.layer2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.layer3 = nn.Linear(64, 32)
        self.relu3 = nn.ReLU()
        self.output_layer = nn.Linear(32, 1)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu1(x)
        x = self.layer2(x)
        x = self.relu2(x)
        x = self.layer3(x)
        x = self.relu3(x)
        x = self.output_layer(x)
        return x

input_size = X_train.shape[1]
model_nn = RegressionNet(input_size)
print("Model Architecture:")
print(model_nn)

# --- 3. Training the Model ---
learning_rate = 0.001
epochs = 50
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model_nn.parameters(), lr=learning_rate)

train_losses = []
test_losses = []

print("\nStarting training...")
for epoch in range(epochs):
    model_nn.train() # Set model to training mode
    batch_train_loss = 0
    for X_batch, y_batch in train_loader:
        predictions = model_nn(X_batch)
        loss = loss_function(predictions, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        batch_train_loss += loss.item()
    train_losses.append(batch_train_loss / len(train_loader))

    model_nn.eval() # Set model to evaluation mode
    batch_test_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            predictions = model_nn(X_batch)
            loss = loss_function(predictions, y_batch)
            batch_test_loss += loss.item()
    test_losses.append(batch_test_loss / len(test_loader))

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}")

# --- 4. Evaluation and Plotting Learning Curves ---
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.title('Learning Curves')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.show()

model_nn.eval()
with torch.no_grad():
    predictions_nn_tensor = model_nn(X_test_tensor)
    predictions_nn = predictions_nn_tensor.numpy()

    mae_nn = mean_absolute_error(y_test, predictions_nn)
    r2_nn = r2_score(y_test, predictions_nn)

    print(f"\n--- Final Neural Network Evaluation ---")
    print(f"Mean Absolute Error (MAE): ${mae_nn:,.0f}")
    print(f"R-squared (R²): {r2_nn:.2f}")

# You need to replace this with your actual result from the previous notebook
mae_rf_final = 61823
print("\n--- Comparison with Best Classical Model (Random Forest) ---")
print(f"Neural Network MAE: ${mae_nn:,.0f}")
print(f"Random Forest MAE: ${mae_rf_final:,.0f}")