# Neural Network Model Analysis on House Prices

In [290]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
from tqdm.auto import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [291]:
# Define a custom dataset
class CustomDataset(Dataset):
    def __init__(self, features, labels, device):
        self.features = features.to(device)
        self.labels = labels.to(device)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [292]:
# Load the data
X_train = pd.read_csv('./data/X_train.csv').values
y_train = pd.read_csv('./data/y_train.csv').values.squeeze()
X_test = pd.read_csv('./data/X_test.csv').values
y_test = pd.read_csv('./data/y_test.csv').values.squeeze()

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Create custom datasets
train_dataset = CustomDataset(X_train_tensor, y_train_tensor, device)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor, device)

# Data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=len(train_dataset), shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset), shuffle=False)

In [293]:
def forward_block(input_dim, output_dim, leaky_param=0.0, dropout_p=0.0, final_layer=False):
    if final_layer is False:
        return nn.Sequential(
            nn.Dropout(p=dropout_p),
            nn.Linear(input_dim, output_dim),
            nn.LeakyReLU(leaky_param,inplace=True)
        )
    else:
        return nn.Sequential(
            nn.Dropout(p=dropout_p),
            nn.Linear(input_dim, output_dim)
        )


# Neural network architecture
class NeuralNet(nn.Module):
    def __init__(self, input_dim, layers, leaky_param, dropout_p):
        super(NeuralNet, self).__init__()
        self.first_layer = forward_block(input_dim, layers[0], leaky_param = leaky_param, dropout_p = dropout_p)
        self.num_layers = len(layers)
        self.middle_layers = []
        for i in range(self.num_layers - 1):
            self.middle_layers.append(
                forward_block(layers[i], layers[i+1], leaky_param = leaky_param, dropout_p = dropout_p).to(device)
            )
        self.last_layer = forward_block(layers[-1], 1, leaky_param = leaky_param, dropout_p = dropout_p, final_layer=True)
    
    def forward(self, x):
        outputs = self.first_layer(x)
        for i in range(self.num_layers - 1):
            outputs = self.middle_layers[i](outputs)
        outputs=self.last_layer(outputs)
        return outputs

In [300]:
# Function to compute RMSE
def rmse(loader, model):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for features, labels in loader:
            outputs = model(features)
            loss = criterion(outputs, labels).item()
            total_loss += loss * features.size(0)
    return np.sqrt(total_loss / len(loader.dataset))

# Training loop
def train(model, train_loader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        for i, (features, labels) in enumerate(train_loader):
            # clear gradient
            optimizer.zero_grad()
            # calculate loss
            outputs = model(features)
            loss = criterion(outputs, labels)
            # back propagation
            loss.backward()
            optimizer.step()
        
        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Train RMSE: {np.sqrt(loss.item()):.4f}')
        if (epoch+1) % 500 == 0:
            print(f'Test RMSE: {rmse(test_loader, model)}')

In [314]:
# parameters
num_epochs=6500
layer_archt=[2048,1024,512,512,512]
lr = 0.01
l2penalty = 0.0015
leaky_param=0.10
dropout_p=0.00

In [315]:
model = NeuralNet(input_dim = len(X_train[0]), layers = layer_archt, leaky_param = leaky_param, dropout_p = dropout_p).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = lr, weight_decay = l2penalty)
criterion = nn.MSELoss()

In [316]:
train(model, tqdm(train_loader), criterion, optimizer, num_epochs)

100%|██████████| 1/1 [00:00<00:00, 99.99it/s]


Epoch [10/6500], Train RMSE: 197950.9137
Epoch [20/6500], Train RMSE: 197938.8706
Epoch [30/6500], Train RMSE: 197912.1745
Epoch [40/6500], Train RMSE: 197866.2032
Epoch [50/6500], Train RMSE: 197797.3195
Epoch [60/6500], Train RMSE: 197702.8163
Epoch [70/6500], Train RMSE: 197580.7913
Epoch [80/6500], Train RMSE: 197429.8759
Epoch [90/6500], Train RMSE: 197249.1421
Epoch [100/6500], Train RMSE: 197038.0297
Epoch [110/6500], Train RMSE: 196796.1808
Epoch [120/6500], Train RMSE: 196523.3359
Epoch [130/6500], Train RMSE: 196219.3555
Epoch [140/6500], Train RMSE: 195884.4080
Epoch [150/6500], Train RMSE: 195518.7116
Epoch [160/6500], Train RMSE: 195122.4712
Epoch [170/6500], Train RMSE: 194695.8052
Epoch [180/6500], Train RMSE: 194238.9140
Epoch [190/6500], Train RMSE: 193752.1544
Epoch [200/6500], Train RMSE: 193236.0003
Epoch [210/6500], Train RMSE: 192690.9381
Epoch [220/6500], Train RMSE: 192117.3508
Epoch [230/6500], Train RMSE: 191515.5595
Epoch [240/6500], Train RMSE: 190885.8983
E

In [317]:
test_rmse = rmse(test_loader, model)
print(f'Test RMSE: {test_rmse}')

Test RMSE: 39167.84150294729
