In [22]:
# Step 1: Imports
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader


In [23]:
# Step 2: Load and Save California Housing dataset
california = fetch_california_housing()
X = california.data
y = california.target

# Save to CSV for reference (optional)
df = pd.DataFrame(X, columns=california.feature_names)
df["MedHouseVal"] = y
df.to_csv("california_housing.csv", index=False)
print(" Saved dataset to california_housing.csv")


 Saved dataset to california_housing.csv


In [24]:
# Step 3: Preprocess
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y.reshape(-1, 1), dtype=torch.float32)


In [25]:
# Step 4: Define ANN Model
class HousingANN(nn.Module):
    def __init__(self, input_dim):
        super(HousingANN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.model(x)


In [26]:
# Step 5: 5-Fold Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
mse_scores, r2_scores = [], []

for fold, (train_idx, val_idx) in enumerate(kf.split(X_tensor)):
    print(f"\nFold {fold+1}")

    X_train, X_val = X_tensor[train_idx], X_tensor[val_idx]
    y_train, y_val = y_tensor[train_idx], y_tensor[val_idx]

    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    model = HousingANN(X_tensor.shape[1])
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training
    for epoch in range(150):
        model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            predictions = model(batch_X)
            loss = criterion(predictions, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    with torch.no_grad():
        y_pred = model(X_val).numpy()
        y_true = y_val.numpy()

        mse = mean_squared_error(y_true, y_pred)
        r2 = r2_score(y_true, y_pred)

        mse_scores.append(mse)
        r2_scores.append(r2)

        print(f"Fold {fold+1} - MSE: {mse:.4f}, R²: {r2:.4f}")



Fold 1
Fold 1 - MSE: 0.2634, R²: 0.7990

Fold 2
Fold 2 - MSE: 0.2651, R²: 0.8060

Fold 3
Fold 3 - MSE: 0.2744, R²: 0.7891

Fold 4
Fold 4 - MSE: 0.2421, R²: 0.8184

Fold 5
Fold 5 - MSE: 0.2740, R²: 0.7964


In [27]:
# Step 6: Summary
print("\n===== Cross-Validation Results =====")
print(f"Average MSE: {np.mean(mse_scores):.4f}")
print(f"Average R² Score: {np.mean(r2_scores):.4f}")



===== Cross-Validation Results =====
Average MSE: 0.2638
Average R² Score: 0.8018
