In [14]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import r2_score
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import CosineAnnealingLR
import tkinter as tk
from tkinter import messagebox

# Set a seed for reproducibility
seed = 100
torch.manual_seed(seed)

# Fetch dataset and split
dataset = fetch_california_housing()
x = dataset.data
y = dataset.target
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)

# Apply Polynomial Features for feature engineering (with interaction terms only)
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
x_train_poly = poly.fit_transform(x_train)
x_val_poly = poly.transform(x_val)

# Standardize features
scaler = StandardScaler()
x_train_poly = scaler.fit_transform(x_train_poly)
x_val_poly = scaler.transform(x_val_poly)

# Convert to PyTorch tensors
x_train_tensor = torch.tensor(x_train_poly, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
x_val_tensor = torch.tensor(x_val_poly, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).reshape(-1, 1)

# Create DataLoaders for mini-batch training
batch_size = 32
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Define the improved model with LayerNorm and adjusted dropout rates
class ImprovedRegressionNet(nn.Module):
    def __init__(self):
        super(ImprovedRegressionNet, self).__init__()
        self.fc1 = nn.Linear(x_train_poly.shape[1], 512)
        self.ln1 = nn.LayerNorm(512)
        self.dropout1 = nn.Dropout(0.2)

        self.fc2 = nn.Linear(512, 256)
        self.ln2 = nn.LayerNorm(256)
        self.dropout2 = nn.Dropout(0.15)

        self.fc3 = nn.Linear(256, 128)
        self.ln3 = nn.LayerNorm(128)
        self.dropout3 = nn.Dropout(0.1)

        self.fc4 = nn.Linear(128, 64)
        self.ln4 = nn.LayerNorm(64)
        self.dropout4 = nn.Dropout(0.05)

        self.fc5 = nn.Linear(64, 1)

    def forward(self, x):
        x = torch.relu(self.ln1(self.fc1(x)))
        x = self.dropout1(x)
        x = torch.relu(self.ln2(self.fc2(x)))
        x = self.dropout2(x)
        x = torch.relu(self.ln3(self.fc3(x)))
        x = self.dropout3(x)
        x = torch.relu(self.ln4(self.fc4(x)))
        x = self.dropout4(x)
        x = self.fc5(x)
        return x

# Instantiate model, loss function, and optimizer (AdamW for better weight decay handling)
model = ImprovedRegressionNet()
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-3)

# Use CosineAnnealingLR as a learning rate scheduler
scheduler = CosineAnnealingLR(optimizer, T_max=50)

# Create a directory for checkpoints and set the path
checkpoint_dir = "checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_path = os.path.join(checkpoint_dir, "best_model.pth")
best_val_loss = float('inf')

# Function to save a checkpoint
def save_checkpoint(model, optimizer, epoch, train_loss, val_loss):
    checkpoint = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'epoch': epoch,
        'train_loss': train_loss,
        'val_loss': val_loss
    }
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved at epoch {epoch+1} with validation loss: {val_loss:.4f}")

# Function to calculate R-squared
def r2_score_torch(y_true, y_pred):
    y_true_np = y_true.cpu().detach().numpy()
    y_pred_np = y_pred.cpu().detach().numpy()
    return r2_score(y_true_np, y_pred_np)

# Training and validation loop with checkpointing
num_epochs = 61 # Increased number of epochs
patience = 30  # Adjusted patience for early stopping
no_improvement_count = 0
writer = SummaryWriter()

for epoch in range(num_epochs):
    # Training step
    model.train()
    train_loss = 0
    for x_batch, y_batch in train_loader:
        pred_train = model(x_batch)
        loss = criterion(pred_train, y_batch)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()

        # Gradient clipping with increased norm
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
        optimizer.step()
    train_loss /= len(train_loader)

    # Validation step
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_batch, y_batch in val_loader:
            pred_val = model(x_batch)
            loss = criterion(pred_val, y_batch)
            val_loss += loss.item()
    val_loss /= len(val_loader)

    # Calculate R-squared as "accuracy"
    val_r2 = r2_score_torch(y_val_tensor, model(x_val_tensor))
    val_accuracy_percent = val_r2 * 100  # Convert to percentage

    # Check if validation loss improved and save checkpoint
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        save_checkpoint(model, optimizer, epoch, train_loss, val_loss)
        no_improvement_count = 0
    else:
        no_improvement_count += 1

    # Early stopping if no improvement
    if no_improvement_count >= patience:
        print(f"Early stopping at epoch {epoch+1}")
        break

    # Log progress
    print(f"Epoch [{epoch+1}/{num_epochs}] - Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, R² (Accuracy): {val_accuracy_percent:.2f}%")
    writer.add_scalar('Loss/train', train_loss, epoch)
    writer.add_scalar('Loss/validation', val_loss, epoch)
    writer.add_scalar('R²/validation', val_accuracy_percent, epoch)

    # Step scheduler
    scheduler.step()

# Close the TensorBoard writer
writer.close()

# Tkinter GUI for prediction
def predict_price():
    # Attempt to get input values from the GUI
    try:
        # Get input values
        features = [float(entry.get()) for entry in feature_entries]
        
        # Ensure we have exactly 8 features
        if len(features) != len(feature_names):
            raise ValueError("Please enter all features.")

        # Transform features with polynomial features
        features_poly = poly.transform([features])

        # Scale the features
        features_poly_scaled = scaler.transform(features_poly)

        # Convert to tensor
        features_tensor = torch.tensor(features_poly_scaled, dtype=torch.float32)

        # Predict housing price
        with torch.no_grad():
            price = model(features_tensor).item()
        
        # Display the result
        messagebox.showinfo("Predicted Price", f"Predicted Housing Price: ${price:.2f}")

    except ValueError as ve:
        messagebox.showerror("Input Error", str(ve))
    except Exception as e:
        messagebox.showerror("Error", "An error occurred during prediction.")

# Create a Tkinter window
window = tk.Tk()
window.title("California Housing Price Predictor")
window.configure(bg='darkgray')  # Set background color

# Feature names for user input
feature_names = dataset.feature_names
feature_entries = []

# Create a frame to center the elements
frame = tk.Frame(window, bg='darkgray')
frame.pack(pady=20)

# Create entry fields for each feature
for feature in feature_names:
    label = tk.Label(frame, text=feature, bg='darkgray', fg='white')  # White text on dark gray background
    label.pack(pady=5)  # Add padding for better spacing
    entry = tk.Entry(frame, width=30)  # Increase the width of the entry field
    entry.pack(pady=5)  # Add padding for better spacing
    feature_entries.append(entry)

# Create a button to trigger prediction
predict_button = tk.Button(frame, text="Predict Price", command=predict_price)
predict_button.pack(pady=10)  # Add padding for better spacing

# Run the Tkinter event loop
window.mainloop()



Checkpoint saved at epoch 1 with validation loss: 0.3972
Epoch [1/61] - Training Loss: 0.5049, Validation Loss: 0.3972, R² (Accuracy): 69.69%
Checkpoint saved at epoch 2 with validation loss: 0.3491
Epoch [2/61] - Training Loss: 0.3928, Validation Loss: 0.3491, R² (Accuracy): 73.36%
Checkpoint saved at epoch 3 with validation loss: 0.3345
Epoch [3/61] - Training Loss: 0.3660, Validation Loss: 0.3345, R² (Accuracy): 74.47%
Checkpoint saved at epoch 4 with validation loss: 0.3233
Epoch [4/61] - Training Loss: 0.3535, Validation Loss: 0.3233, R² (Accuracy): 75.33%
Checkpoint saved at epoch 5 with validation loss: 0.3226
Epoch [5/61] - Training Loss: 0.3444, Validation Loss: 0.3226, R² (Accuracy): 75.38%
Epoch [6/61] - Training Loss: 0.3380, Validation Loss: 0.3254, R² (Accuracy): 75.17%
Checkpoint saved at epoch 7 with validation loss: 0.3062
Epoch [7/61] - Training Loss: 0.3317, Validation Loss: 0.3062, R² (Accuracy): 76.63%
Epoch [8/61] - Training Loss: 0.3251, Validation Loss: 0.3083, 