<a href="https://colab.research.google.com/github/OmarFarukNoman/Into-to-Machine-Learning/blob/main/Homework-5/Problem_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Problem-3(a)**

In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import time

# We're using housing dataset for HW5
url = "https://raw.githubusercontent.com/HamedTabkhi/Intro-to-ML/main/Dataset/Housing.csv"
df = pd.read_csv(url)

# I've separated features and target
features = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
target = 'price'

X_in = df[features].values
y_out = df[target].values

# Split data using PyTorch (80% training, 20% validation)
torch.manual_seed(42)          # for reproducibility
N = X_in.shape[0]
train_size = int(0.8 * N)

# Convert to tensors first
X_in_tensor = torch.FloatTensor(X_in)
y_out_tensor = torch.FloatTensor(y_out).reshape(-1, 1)

# Create random permutation of indices
indices = torch.randperm(N)
train_idx = indices[:train_size]
val_idx   = indices[train_size:]

# Split the tensors directly
X_in_train = X_in_tensor[train_idx]
X_in_val   = X_in_tensor[val_idx]
y_out_train = y_out_tensor[train_idx]
y_out_val   = y_out_tensor[val_idx]

# Standardization (convert back to numpy for sklearn, then back to tensor)
scaler = StandardScaler()
X_in_train_scaled = scaler.fit_transform(X_in_train.numpy())
X_in_val_scaled = scaler.transform(X_in_val.numpy())

# Convert back to PyTorch tensors
X_in_train_tensor = torch.FloatTensor(X_in_train_scaled)
y_out_train_tensor = y_out_train.clone()
X_in_val_tensor = torch.FloatTensor(X_in_val_scaled)
y_out_val_tensor = y_out_val.clone()

# Neural Network with one hidden layer (8 nodes)
class HousingNN(nn.Module):
    def __init__(self, input_dim):
        super(HousingNN, self).__init__()
        self.hidden = nn.Linear(input_dim, 8)  # One hidden layer with 8 nodes
        self.output = nn.Linear(8, 1)          # Output layer

    def forward(self, x):
        x = torch.relu(self.hidden(x))  # ReLU activation for hidden layer
        x = self.output(x)              # Linear output for regression
        return x

# Initialize model
input_dim = len(features)
model = HousingNN(input_dim)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 200
train_losses = []
val_losses = []

start_time = time.time()

for epoch in range(num_epochs):
    # Training phase
    model.train()
    optimizer.zero_grad()

    # Forward pass
    train_outputs = model(X_in_train_tensor)
    train_loss = criterion(train_outputs, y_out_train_tensor)

    # Backward pass and optimization
    train_loss.backward()
    optimizer.step()

    # Validation phase
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_in_val_tensor)
        val_loss = criterion(val_outputs, y_out_val_tensor)

    # Store losses
    train_losses.append(train_loss.item())
    val_losses.append(val_loss.item())

training_time = time.time() - start_time

# Final evaluation
model.eval()
with torch.no_grad():
    # Final predictions
    final_train_outputs = model(X_in_train_tensor)
    final_val_outputs = model(X_in_val_tensor)

    final_train_loss = criterion(final_train_outputs, y_out_train_tensor).item()
    final_val_loss = criterion(final_val_outputs, y_out_val_tensor).item()

    # Calculate RMSE for accuracy metric
    train_rmse = torch.sqrt(criterion(final_train_outputs, y_out_train_tensor)).item()
    val_rmse = torch.sqrt(criterion(final_val_outputs, y_out_val_tensor)).item()

    # Calculate R-squared for accuracy
    train_mean = y_out_train_tensor.mean()
    train_ss_total = ((y_out_train_tensor - train_mean) ** 2).sum()
    train_ss_residual = ((y_out_train_tensor - final_train_outputs) ** 2).sum()
    train_r2 = 1 - (train_ss_residual / train_ss_total).item()

    val_mean = y_out_val_tensor.mean()
    val_ss_total = ((y_out_val_tensor - val_mean) ** 2).sum()
    val_ss_residual = ((y_out_val_tensor - final_val_outputs) ** 2).sum()
    val_r2 = 1 - (val_ss_residual / val_ss_total).item()

# Results
print("Neural Network Result after 200 iterations:")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Final Training Loss (MSE): {final_train_loss:.2f}")
print(f"Final Validation Loss (MSE): {final_val_loss:.2f}")
print(f"Training RMSE: {train_rmse:.2f}")
print(f"Validation RMSE: {val_rmse:.2f}")
print(f"Training R² Score: {train_r2:.4f}")
print(f"Validation R² Score: {val_r2:.4f}")

# Model architecture info
print(f"\nModel Architecture:")
print(f"Input Features: {input_dim} features")
print(f"Input Layer: {input_dim+1} nodes") # 1 is added for bias term
print(f"Hidden Layer: 8 nodes with ReLU activation")
print(f"Output Layer: 1 node (linear)")
print(f"Total Parameters: {sum(p.numel() for p in model.parameters())}")

Neural Network Result after 200 iterations:

Training Time: 0.73 seconds
Final Training Loss (MSE): 26305951170560.00
Final Validation Loss (MSE): 25845322219520.00
Training RMSE: 5128933.00
Validation RMSE: 5083829.50
Training R² Score: -6.1883
Validation R² Score: -8.1609

Model Architecture:
Input Features: 5 features
Input Layer: 6 nodes
Hidden Layer: 8 nodes with ReLU activation
Output Layer: 1 node (linear)
Total Parameters: 57


**Problem-3(b)**

In [9]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import time
import matplotlib.pyplot as plt

# We're using housing dataset for HW5
url = "https://raw.githubusercontent.com/HamedTabkhi/Intro-to-ML/main/Dataset/Housing.csv"
df = pd.read_csv(url)

# I've separated features and target
features = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
target = 'price'

X_in = df[features].values
y_out = df[target].values

# Split data using PyTorch (80% training, 20% validation)
torch.manual_seed(42)          # for reproducibility
N = X_in.shape[0]
train_size = int(0.8 * N)

# Convert to tensors first
X_in_tensor = torch.FloatTensor(X_in)
y_out_tensor = torch.FloatTensor(y_out).reshape(-1, 1)

# Create random permutation of indices
indices = torch.randperm(N)
train_idx = indices[:train_size]
val_idx   = indices[train_size:]

# Split the tensors directly
X_in_train = X_in_tensor[train_idx]
X_in_val   = X_in_tensor[val_idx]
y_out_train = y_out_tensor[train_idx]
y_out_val   = y_out_tensor[val_idx]

# Standardization (convert back to numpy for sklearn, then back to tensor)
scaler = StandardScaler()
X_in_train_scaled = scaler.fit_transform(X_in_train.numpy())
X_in_val_scaled = scaler.transform(X_in_val.numpy())

# Convert back to PyTorch tensors
X_in_train_tensor = torch.FloatTensor(X_in_train_scaled)
y_out_train_tensor = y_out_train.clone()
X_in_val_tensor = torch.FloatTensor(X_in_val_scaled)
y_out_val_tensor = y_out_val.clone()

# Neural Network with 3 hidden layers
class HousingNN(nn.Module):
    def __init__(self, input_dim):
        super(HousingNN, self).__init__()
        self.hidden1 = nn.Linear(input_dim, 16)  # First hidden layer with 16 nodes
        self.hidden2 = nn.Linear(16, 8)          # Second hidden layer with 8 nodes
        self.hidden3 = nn.Linear(8, 4)           # Third hidden layer with 4 nodes
        self.output = nn.Linear(4, 1)            # Output layer

    def forward(self, x):
        x = torch.relu(self.hidden1(x))  # ReLU activation for first hidden layer
        x = torch.relu(self.hidden2(x))  # ReLU activation for second hidden layer
        x = torch.relu(self.hidden3(x))  # ReLU activation for third hidden layer
        x = self.output(x)               # Linear output for regression
        return x

# Initialize model
input_dim = len(features)
model = HousingNN(input_dim)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 200
train_losses = []
val_losses = []

start_time = time.time()

for epoch in range(num_epochs):
    # Training phase
    model.train()
    optimizer.zero_grad()

    # Forward pass
    train_outputs = model(X_in_train_tensor)
    train_loss = criterion(train_outputs, y_out_train_tensor)

    # Backward pass and optimization
    train_loss.backward()
    optimizer.step()

    # Validation phase
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_in_val_tensor)
        val_loss = criterion(val_outputs, y_out_val_tensor)

    # Store losses
    train_losses.append(train_loss.item())
    val_losses.append(val_loss.item())

training_time = time.time() - start_time

# Final evaluation
model.eval()
with torch.no_grad():
    # Final predictions
    final_train_outputs = model(X_in_train_tensor)
    final_val_outputs = model(X_in_val_tensor)

    final_train_loss = criterion(final_train_outputs, y_out_train_tensor).item()
    final_val_loss = criterion(final_val_outputs, y_out_val_tensor).item()

    # Calculate RMSE for accuracy metric
    train_rmse = torch.sqrt(criterion(final_train_outputs, y_out_train_tensor)).item()
    val_rmse = torch.sqrt(criterion(final_val_outputs, y_out_val_tensor)).item()

    # Calculate R-squared for accuracy
    train_mean = y_out_train_tensor.mean()
    train_ss_total = ((y_out_train_tensor - train_mean) ** 2).sum()
    train_ss_residual = ((y_out_train_tensor - final_train_outputs) ** 2).sum()
    train_r2 = 1 - (train_ss_residual / train_ss_total).item()

    val_mean = y_out_val_tensor.mean()
    val_ss_total = ((y_out_val_tensor - val_mean) ** 2).sum()
    val_ss_residual = ((y_out_val_tensor - final_val_outputs) ** 2).sum()
    val_r2 = 1 - (val_ss_residual / val_ss_total).item()

# Overfitting Analysis
overfitting_gap = final_train_loss - final_val_loss
overfitting_percentage = (abs(overfitting_gap) / min(final_train_loss, final_val_loss)) * 100

# Performance gap analysis
train_val_gap = abs(train_rmse - val_rmse)
r2_gap = abs(train_r2 - val_r2)

# Results
print("Neural Network Result after 200 iterations:")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Final Training Loss (MSE): {final_train_loss:.2f}")
print(f"Final Validation Loss (MSE): {final_val_loss:.2f}")
print(f"Training RMSE: {train_rmse:.2f}")
print(f"Validation RMSE: {val_rmse:.2f}")
print(f"Training R² Score: {train_r2:.4f}")
print(f"Validation R² Score: {val_r2:.4f}")

# Overfitting Analysis Results
print(f"\nOverfitting Analysis:")
print(f"Training-Validation Loss Gap: {overfitting_gap:.4f}")
print(f"Performance Gap Percentage: {overfitting_percentage:.2f}%")
print(f"RMSE Gap: {train_val_gap:.4f}")
print(f"R² Score Gap: {r2_gap:.4f}")

# Model architecture info
print(f"\nModel Architecture:")
print(f"Input Features: {input_dim} features")
print(f"Input Layer: {input_dim+1} nodes")
print(f"Hidden Layer 1: 16 nodes with ReLU activation")
print(f"Hidden Layer 2: 8 nodes with ReLU activation")
print(f"Hidden Layer 3: 4 nodes with ReLU activation")
print(f"Output Layer: 1 node (linear)")
print(f"Total Parameters: {sum(p.numel() for p in model.parameters())}")


Neural Network Result after 200 iterations:

Training Time: 0.99 seconds
Final Training Loss (MSE): 26305831632896.00
Final Validation Loss (MSE): 25845208973312.00
Training RMSE: 5128921.00
Validation RMSE: 5083818.50
Training R² Score: -6.1883
Validation R² Score: -8.1608

Overfitting Analysis:
Training-Validation Loss Gap: 460622659584.0000
Performance Gap Percentage: 1.78%
RMSE Gap: 45102.5000
R² Score Gap: 1.9725

Model Architecture:
Input Features: 5 features
Input Layer: 6 nodes
Hidden Layer 1: 16 nodes with ReLU activation
Hidden Layer 2: 8 nodes with ReLU activation
Hidden Layer 3: 4 nodes with ReLU activation
Output Layer: 1 node (linear)
Total Parameters: 273
