In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Loading the datasets and scaling the continuous features

In [None]:
train_df_raw = pd.read_csv('/content/drive/MyDrive/Satellite_Property_Project/train_processed.csv')
test_df_raw = pd.read_csv('/content/drive/MyDrive/Satellite_Property_Project/test_processed.csv')

continuous_features = [
    'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot',
    'floors', 'sqft_basement', 'lat', 'long',
    'sqft_living15', 'sqft_lot15', 'house_age',
    'grade', 'condition', 'view'
]

binary_features = ['is_renovated', 'waterfront']

scaler = StandardScaler()
train_df, val_df = train_test_split(train_df_raw, test_size=0.2, random_state=42)

scaler.fit(train_df[continuous_features])

train_df[continuous_features] = scaler.transform(train_df[continuous_features])
val_df[continuous_features] = scaler.transform(val_df[continuous_features])
test_df_raw[continuous_features] = scaler.transform(test_df_raw[continuous_features])

all_features = continuous_features + binary_features

Preparing dataset for training

In [None]:
class TabularDataset(Dataset):
    def __init__(self, dataframe, cols, is_test=False):
        self.df = dataframe
        self.cols = cols
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        tabular = torch.tensor(row[self.cols].values.astype(np.float32))

        if self.is_test:
            return tabular, row['id']
        else:
            label = torch.tensor(row['log_price'], dtype=torch.float32)
            return tabular, label

batch_size = 64
train_ds = TabularDataset(train_df, all_features)
val_ds = TabularDataset(val_df, all_features)
test_ds = TabularDataset(test_df_raw, all_features, is_test=True)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

print("Tabular Data Loaded Successfully.")

Tabular Data Loaded Successfully.


Architecturing a simple neural network

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class SimpleTabularNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleTabularNN, self).__init__()

        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x.squeeze(1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleTabularNN(input_dim=len(all_features)).to(device)
print(f"Model initialized on {device}")

Model initialized on cuda


Training the model for 200 epochs implemented with early stopping

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import os

SAVE_PATH = "/content/drive/MyDrive/MainProject/Models/best_simple_tabular_nn.pth"
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

def train_simple_with_early_stopping(
    num_epochs=200,
    patience=15,
    min_delta=1e-4
):
    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):

        model.train()
        train_loss = 0.0

        for tabular, labels in train_loader:
            tabular = tabular.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(tabular)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)


        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for tabular, labels in val_loader:
                tabular = tabular.to(device)
                labels = labels.to(device)
                outputs = model(tabular)
                val_loss += criterion(outputs, labels).item()

        val_loss /= len(val_loader)

        print(
            f"Epoch {epoch+1:03d} | "
            f"Train Loss: {train_loss:.4f} | "
            f"Val Loss: {val_loss:.4f}"
        )


        if best_val_loss - val_loss > min_delta:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), SAVE_PATH)
            print("  âœ“ Saved new best model")
        else:
            patience_counter += 1
            print(f"  âœ— No improvement ({patience_counter}/{patience})")

            if patience_counter >= patience:
                print("\nðŸ›‘ Early stopping triggered")
                break

    print("\nTraining completed.")

train_simple_with_early_stopping(
    num_epochs=200,
    patience=15
)


Epoch 001 | Train Loss: 0.9188 | Val Loss: 0.2373
  âœ“ Saved new best model
Epoch 002 | Train Loss: 0.6175 | Val Loss: 0.1356
  âœ“ Saved new best model
Epoch 003 | Train Loss: 0.4195 | Val Loss: 0.0689
  âœ“ Saved new best model
Epoch 004 | Train Loss: 0.3384 | Val Loss: 0.0760
  âœ— No improvement (1/15)
Epoch 005 | Train Loss: 0.2280 | Val Loss: 0.0640
  âœ“ Saved new best model
Epoch 006 | Train Loss: 0.1786 | Val Loss: 0.1051
  âœ— No improvement (1/15)
Epoch 007 | Train Loss: 0.1139 | Val Loss: 0.0596
  âœ“ Saved new best model
Epoch 008 | Train Loss: 0.0916 | Val Loss: 0.0448
  âœ“ Saved new best model
Epoch 009 | Train Loss: 0.0706 | Val Loss: 0.0452
  âœ— No improvement (1/15)
Epoch 010 | Train Loss: 0.0687 | Val Loss: 0.0442
  âœ“ Saved new best model
Epoch 011 | Train Loss: 0.0634 | Val Loss: 0.0580
  âœ— No improvement (1/15)
Epoch 012 | Train Loss: 0.0649 | Val Loss: 0.0886
  âœ— No improvement (2/15)
Epoch 013 | Train Loss: 0.0662 | Val Loss: 0.0628
  âœ— No improvement 

Calculating RMSE and R2 values

In [None]:
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

model.load_state_dict(torch.load(SAVE_PATH))
model.eval()

val_preds, val_targets = [], []

with torch.no_grad():
    for tabular, labels in val_loader:
        tabular, labels = tabular.to(device), labels.to(device)
        out = model(tabular)
        val_preds.extend(out.cpu().numpy().flatten())
        val_targets.extend(labels.cpu().numpy().flatten())

y_true = np.expm1(val_targets)
y_pred = np.expm1(val_preds)
r2 = r2_score(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))

print("----------------------------")
print(f"TABULAR ONLY RESULTS:")
print(f"R2 Score: {r2:.4f}")
print(f"RMSE:     ${rmse:,.2f}")
print("----------------------------")

----------------------------
TABULAR ONLY RESULTS:
R2 Score: 0.8500
RMSE:     $136,789.82
----------------------------
