## PyTorch K-Fold CV for Regression

In [None]:
# -*- coding: utf-8 -*-
"""
PyTorch K-Fold Cross-Validation Example for Regression

This script demonstrates how to implement K-Fold Cross-Validation
with PyTorch for a simple regression problem.
"""

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression # Using make_regression for simplicity
import matplotlib.pyplot as plt

# --- 1. Configuration ---
N_SPLITS = 5        # Number of folds for K-Fold CV
N_EPOCHS = 50       # Number of training epochs per fold
BATCH_SIZE = 16     # Batch size for training
LEARNING_RATE = 0.001 # Learning rate for the optimizer
N_SAMPLES = 500     # Total number of samples in the dataset
N_FEATURES = 10     # Number of features for the synthetic data
RANDOM_SEED = 42    # Seed for reproducibility

# Set random seeds for reproducibility
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RANDOM_SEED)

# --- 2. Generate Synthetic Regression Data ---
print("Generating synthetic data...")
# Using sklearn's make_regression for a controlled example
X, y = make_regression(n_samples=N_SAMPLES, n_features=N_FEATURES, noise=15.0, random_state=RANDOM_SEED)

# Reshape y to be a 2D tensor [n_samples, 1] as expected by MSELoss
y = y.reshape(-1, 1)

# --- 3. Data Preprocessing ---
# It's crucial to scale features, especially for neural networks
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X) # Fit and transform on the whole dataset for simplicity here
                                   # In a real scenario, fit ONLY on training data within the fold

# Convert numpy arrays to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

# Create a full dataset
full_dataset = TensorDataset(X_tensor, y_tensor)

print(f"Data generated: {X_tensor.shape[0]} samples, {X_tensor.shape[1]} features.")

# --- 4. Define the Neural Network Model ---
class RegressionNet(nn.Module):
    """A simple feed-forward neural network for regression."""
    def __init__(self, input_features):
        super(RegressionNet, self).__init__()
        self.layer_1 = nn.Linear(input_features, 64)
        self.relu1 = nn.ReLU()
        self.layer_2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.output_layer = nn.Linear(32, 1) # Output layer has 1 neuron for regression

    def forward(self, x):
        x = self.layer_1(x)
        x = self.relu1(x)
        x = self.layer_2(x)
        x = self.relu2(x)
        x = self.output_layer(x)
        return x

# --- 5. K-Fold Cross-Validation Setup ---
kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_SEED)
fold_results = [] # To store validation loss for each fold

print(f"\nStarting {N_SPLITS}-Fold Cross-Validation...")

# --- 6. K-Fold Cross-Validation Loop ---
for fold, (train_idx, val_idx) in enumerate(kf.split(full_dataset)):
    print(f"\n--- Fold {fold+1}/{N_SPLITS} ---")

    # --- Data Splitting for the Current Fold ---
    # Create subsets for training and validation based on indices from KFold
    train_subset = torch.utils.data.Subset(full_dataset, train_idx)
    val_subset = torch.utils.data.Subset(full_dataset, val_idx)

    # Create DataLoaders for the current fold
    train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False) # No need to shuffle validation data

    print(f"Train samples: {len(train_subset)}, Validation samples: {len(val_subset)}")

    # --- Model, Loss, Optimizer Initialization (re-initialize for each fold) ---
    model = RegressionNet(input_features=N_FEATURES)
    criterion = nn.MSELoss() # Mean Squared Error loss for regression
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # --- Training Loop for the Current Fold ---
    print("Training...")
    for epoch in range(N_EPOCHS):
        model.train() # Set model to training mode
        epoch_train_loss = 0.0
        for batch_X, batch_y in train_loader:
            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            epoch_train_loss += loss.item() * batch_X.size(0) # Accumulate loss weighted by batch size

        avg_epoch_train_loss = epoch_train_loss / len(train_subset)
        # Optional: Print training progress less frequently
        if (epoch + 1) % 10 == 0:
             print(f'  Epoch [{epoch+1}/{N_EPOCHS}], Train Loss: {avg_epoch_train_loss:.4f}')

    # --- Validation Loop for the Current Fold ---
    print("Validating...")
    model.eval() # Set model to evaluation mode
    fold_val_loss = 0.0
    with torch.no_grad(): # Disable gradient calculation during validation
        for batch_X, batch_y in val_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            fold_val_loss += loss.item() * batch_X.size(0) # Accumulate loss

    avg_fold_val_loss = fold_val_loss / len(val_subset)
    fold_results.append(avg_fold_val_loss)
    print(f"--- Fold {fold+1} Validation MSE: {avg_fold_val_loss:.4f} ---")

# --- 7. Results ---
average_val_loss = np.mean(fold_results)
std_dev_val_loss = np.std(fold_results)

print("\n--- Cross-Validation Results ---")
print(f"Validation MSE for each fold: {[f'{loss:.4f}' for loss in fold_results]}")
print(f"Average Validation MSE across {N_SPLITS} folds: {average_val_loss:.4f}")
print(f"Standard Deviation of Validation MSE: {std_dev_val_loss:.4f}")

print("\nK-Fold Cross-Validation finished.")

# --- Optional: Train Final Model on Full Data (if needed) ---
# After finding good hyperparameters using CV, you might train one final
# model on the entire dataset for deployment.
# print("\nTraining final model on full dataset (optional)...")
# final_model = RegressionNet(input_features=N_FEATURES)
# final_criterion = nn.MSELoss()
# final_optimizer = optim.Adam(final_model.parameters(), lr=LEARNING_RATE)
# full_loader = DataLoader(full_dataset, batch_size=BATCH_SIZE, shuffle=True)
#
# for epoch in range(N_EPOCHS): # Use the same number of epochs or adjust
#     final_model.train()
#     for batch_X, batch_y in full_loader:
#         final_optimizer.zero_grad()
#         outputs = final_model(batch_X)
#         loss = final_criterion(outputs, batch_y)
#         loss.backward()
#         final_optimizer.step()
#     if (epoch + 1) % 10 == 0:
#          print(f'  Final Model - Epoch [{epoch+1}/{N_EPOCHS}], Loss: {loss.item():.4f}') # Loss of last batch
# print("Final model training complete.")
# # You can now save and use 'final_model'


Generating synthetic data...
Data generated: 500 samples, 10 features.

Starting 5-Fold Cross-Validation...

--- Fold 1/5 ---
Train samples: 400, Validation samples: 100
Training...
  Epoch [10/50], Train Loss: 8302.0220
  Epoch [20/50], Train Loss: 391.8277
  Epoch [30/50], Train Loss: 310.9277
  Epoch [40/50], Train Loss: 274.5235
  Epoch [50/50], Train Loss: 249.5684
Validating...
--- Fold 1 Validation MSE: 317.9261 ---

--- Fold 2/5 ---
Train samples: 400, Validation samples: 100
Training...
  Epoch [10/50], Train Loss: 7245.0312
  Epoch [20/50], Train Loss: 361.5749
  Epoch [30/50], Train Loss: 280.0867
  Epoch [40/50], Train Loss: 251.2109
  Epoch [50/50], Train Loss: 226.2608
Validating...
--- Fold 2 Validation MSE: 279.0495 ---

--- Fold 3/5 ---
Train samples: 400, Validation samples: 100
Training...
  Epoch [10/50], Train Loss: 7414.5510
  Epoch [20/50], Train Loss: 354.0877
  Epoch [30/50], Train Loss: 293.9479
  Epoch [40/50], Train Loss: 261.6438
  Epoch [50/50], Train Loss

In [None]:
# -*- coding: utf-8 -*-
"""
PyTorch K-Fold Cross-Validation with Hyperparameter Tuning for Regression

This script demonstrates how to implement K-Fold Cross-Validation
with PyTorch for a simple regression problem and includes a basic
grid search for hyperparameter tuning (learning rate and hidden layer size).
"""

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Subset
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression
import itertools # To iterate over hyperparameter combinations
import time

# --- 1. Configuration ---
N_SPLITS = 5        # Number of folds for K-Fold CV
N_EPOCHS = 30       # Number of training epochs per fold (reduced for faster demo)
BATCH_SIZE = 32     # Batch size for training
N_SAMPLES = 500     # Total number of samples in the dataset
N_FEATURES = 10     # Number of features for the synthetic data
RANDOM_SEED = 42    # Seed for reproducibility

# --- Hyperparameter Search Space ---
# Define the hyperparameters you want to tune
param_grid = {
    'learning_rate': [0.01, 0.001], # Example learning rates
    'hidden_size_1': [32, 64]       # Example sizes for the first hidden layer
    # Add more hyperparameters here if needed (e.g., 'hidden_size_2', 'batch_size')
}

# Generate all combinations of hyperparameters
hyperparameter_combinations = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]

print(f"Starting Hyperparameter Search with {len(hyperparameter_combinations)} combinations.")
print("Hyperparameter combinations to test:")
for combo in hyperparameter_combinations:
    print(f"  - {combo}")


# Set random seeds for reproducibility
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RANDOM_SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# --- 2. Generate Synthetic Regression Data ---
print("\nGenerating synthetic data...")
X, y = make_regression(n_samples=N_SAMPLES, n_features=N_FEATURES, noise=20.0, random_state=RANDOM_SEED)
y = y.reshape(-1, 1) # Reshape y for MSELoss

# --- 3. Data Preprocessing (Outside the loop for this example) ---
# NOTE: Ideally, scaling should be done *inside* the K-Fold loop
#       fitting the scaler ONLY on the training data of that fold.
#       Doing it outside is simpler for demonstration but introduces slight data leakage.
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert numpy arrays to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)
full_dataset = TensorDataset(X_tensor, y_tensor)

print(f"Data generated: {X_tensor.shape[0]} samples, {X_tensor.shape[1]} features.")

# --- 4. Define the Neural Network Model ---
class RegressionNet(nn.Module):
    """A simple feed-forward neural network for regression."""
    def __init__(self, input_features, hidden_size_1=64): # Default value
        super(RegressionNet, self).__init__()
        # Use the passed hidden_size_1 for flexibility
        self.layer_1 = nn.Linear(input_features, hidden_size_1)
        self.relu1 = nn.ReLU()
        # Adjust subsequent layers if needed, here we keep it simple
        self.layer_2 = nn.Linear(hidden_size_1, hidden_size_1 // 2) # Example: second layer depends on first
        self.relu2 = nn.ReLU()
        self.output_layer = nn.Linear(hidden_size_1 // 2, 1)

    def forward(self, x):
        x = self.layer_1(x)
        x = self.relu1(x)
        x = self.layer_2(x)
        x = self.relu2(x)
        x = self.output_layer(x)
        return x

# --- 5. Hyperparameter Tuning Loop ---
best_hyperparams = None
best_avg_val_loss = float('inf') # Initialize with infinity
results_log = [] # Store results for each hyperparameter set

start_time_tuning = time.time()

for params in hyperparameter_combinations:
    current_lr = params['learning_rate']
    current_hidden_size = params['hidden_size_1']
    print(f"\n--- Testing Hyperparameters: LR={current_lr}, HiddenSize1={current_hidden_size} ---")

    # --- 6. K-Fold Cross-Validation Setup (Inside Hyperparameter Loop) ---
    kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_SEED)
    fold_val_losses = [] # Store validation loss for each fold *for this set of hyperparameters*
    fold_start_time = time.time()

    # --- 7. K-Fold Cross-Validation Loop ---
    for fold, (train_idx, val_idx) in enumerate(kf.split(full_dataset)):
        # print(f"  Fold {fold+1}/{N_SPLITS}...") # Less verbose output

        # --- Data Splitting for the Current Fold ---
        train_subset = Subset(full_dataset, train_idx)
        val_subset = Subset(full_dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False)

        # --- Model, Loss, Optimizer Initialization (Using current hyperparameters) ---
        # Pass the current hidden size to the model
        model = RegressionNet(input_features=N_FEATURES, hidden_size_1=current_hidden_size)
        criterion = nn.MSELoss()
        # Use the current learning rate for the optimizer
        optimizer = optim.Adam(model.parameters(), lr=current_lr)

        # --- Training Loop for the Current Fold ---
        for epoch in range(N_EPOCHS):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
            # No epoch printing during hyperparameter search to reduce clutter

        # --- Validation Loop for the Current Fold ---
        model.eval()
        fold_val_loss = 0.0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                fold_val_loss += loss.item() * batch_X.size(0)

        avg_fold_val_loss = fold_val_loss / len(val_subset)
        fold_val_losses.append(avg_fold_val_loss)
        # print(f"    Fold {fold+1} Val MSE: {avg_fold_val_loss:.4f}") # Less verbose

    # --- Calculate Average Performance for this Hyperparameter Set ---
    average_loss_for_params = np.mean(fold_val_losses)
    std_dev_for_params = np.std(fold_val_losses)
    fold_end_time = time.time()

    print(f"  Average Validation MSE for {params}: {average_loss_for_params:.4f} +/- {std_dev_for_params:.4f}")
    print(f"  Time taken for this set: {fold_end_time - fold_start_time:.2f} seconds")

    results_log.append({
        'params': params,
        'avg_val_loss': average_loss_for_params,
        'std_dev_val_loss': std_dev_for_params
    })

    # --- Update Best Hyperparameters ---
    if average_loss_for_params < best_avg_val_loss:
        best_avg_val_loss = average_loss_for_params
        best_hyperparams = params
        print(f"  ** New best hyperparameters found! **")

end_time_tuning = time.time()
print(f"\nTotal Hyperparameter Tuning Time: {end_time_tuning - start_time_tuning:.2f} seconds")

# --- 8. Final Results ---
print("\n--- Hyperparameter Tuning Results ---")
if best_hyperparams:
    print(f"Best Hyperparameters Found:")
    for key, value in best_hyperparams.items():
        print(f"  - {key}: {value}")
    print(f"Best Average Validation MSE: {best_avg_val_loss:.4f}")
else:
    print("Hyperparameter tuning did not complete successfully or no parameters were tested.")

# --- Optional: Train Final Model with Best Hyperparameters ---
# if best_hyperparams:
#     print("\nTraining final model on full dataset with best hyperparameters...")
#     final_model = RegressionNet(input_features=N_FEATURES, hidden_size_1=best_hyperparams['hidden_size_1'])
#     final_criterion = nn.MSELoss()
#     final_optimizer = optim.Adam(final_model.parameters(), lr=best_hyperparams['learning_rate'])
#     full_loader = DataLoader(full_dataset, batch_size=BATCH_SIZE, shuffle=True)
#
#     for epoch in range(N_EPOCHS): # Use the same number of epochs or adjust
#         final_model.train()
#         epoch_loss = 0.0
#         for batch_X, batch_y in full_loader:
#             final_optimizer.zero_grad()
#             outputs = final_model(batch_X)
#             loss = final_criterion(outputs, batch_y)
#             loss.backward()
#             final_optimizer.step()
#             epoch_loss += loss.item() * batch_X.size(0)
#
#         avg_epoch_loss = epoch_loss / len(full_dataset)
#         if (epoch + 1) % 10 == 0:
#              print(f'  Final Model - Epoch [{epoch+1}/{N_EPOCHS}], Train Loss: {avg_epoch_loss:.4f}')
#     print("Final model training complete.")
#     # You can now save and use 'final_model'

print("\nScript finished.")


Starting Hyperparameter Search with 4 combinations.
Hyperparameter combinations to test:
  - {'learning_rate': 0.01, 'hidden_size_1': 32}
  - {'learning_rate': 0.01, 'hidden_size_1': 64}
  - {'learning_rate': 0.001, 'hidden_size_1': 32}
  - {'learning_rate': 0.001, 'hidden_size_1': 64}

Generating synthetic data...
Data generated: 500 samples, 10 features.

--- Testing Hyperparameters: LR=0.01, HiddenSize1=32 ---
  Average Validation MSE for {'learning_rate': 0.01, 'hidden_size_1': 32}: 436.6098 +/- 43.8240
  Time taken for this set: 3.35 seconds
  ** New best hyperparameters found! **

--- Testing Hyperparameters: LR=0.01, HiddenSize1=64 ---
  Average Validation MSE for {'learning_rate': 0.01, 'hidden_size_1': 64}: 445.4537 +/- 33.8663
  Time taken for this set: 3.96 seconds

--- Testing Hyperparameters: LR=0.001, HiddenSize1=32 ---
  Average Validation MSE for {'learning_rate': 0.001, 'hidden_size_1': 32}: 4661.9173 +/- 642.2490
  Time taken for this set: 3.27 seconds

--- Testing Hy