<h5>This code is part of XLR. It implements sine wave learning rate model training. Date: 01/09/2025</h5>

<h5>Contact: rakibul.haque@utsa.edu</h5>

<h5>Cite as: R. U. Haque and P. Markopoulos,"XLR: A Universal Framework for Learning rate Adaptation via Exponential Range Exploration", 2025</h5>

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

# Step 1: Load and preprocess the dataset
# Load the California Housing dataset
data = fetch_california_housing()
X = data.data
y = data.target

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)  # Regression target should be a column vector
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Step 2: Define the MLP model
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = 1
model = MLP(input_dim, hidden_dim, output_dim)

In [5]:
# Define the exponential_decay_sine_wave_lr function
def exponential_decay_sine_wave_lr(t, lr0, alpha, T, beta, b):
    t = torch.tensor(t, dtype=torch.float32)  # Convert t to a tensor
    return lr0 * torch.exp(-alpha * t / T) * (torch.sin(beta * t / (2 * torch.pi)) + torch.exp(-alpha * t / T) + 0.5)

In [6]:

le_rate=0.01
# Step 3: Define the loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error Loss for regression
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Step 4: Create DataLoader for batching
batch_size = 128  # You can adjust the batch size as needed
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Step 5: Training the model
epochs = 50
train_losses = []
test_losses = []
train_r2_scores = []
test_r2_scores = []
learning_rates = []


# Initialize lists to store metrics
T = len(train_loader) * epochs  # Total iterations (batches * epochs)
alpha = 0.1  # Decay rate
beta = 1.0   # Oscillation frequency
lr0 = le_rate   # Initial learning rate



for epoch in range(epochs):
    # Set the model to training mode
    model.train()
    
    # Initialize variables to accumulate loss and predictions
    total_train_loss = 0.0
    total_train_preds = []
    total_train_labels = []
    i = 0  

    # Loop over the batches
    for batch_idx, (inputs, targets) in enumerate(train_loader):
         # Update learning rate using ESLearning
        t = epoch * len(train_loader) + i  # Current iteration
        lr = exponential_decay_sine_wave_lr(t, lr0, alpha, T, beta, len(train_loader))

        # Update the learning rate for the optimizer
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr.item()
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        total_train_loss += loss.item()
        total_train_preds.append(outputs.detach().numpy())
        total_train_labels.append(targets.detach().numpy())
        i += 1
    # Average training loss for the epoch
    avg_train_loss = total_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Evaluate the model on test data
    model.eval()  # Set the model to evaluation mode
    total_test_loss = 0.0
    total_test_preds = []
    total_test_labels = []

    with torch.no_grad():
        for inputs, targets in test_loader:
            test_outputs = model(inputs)
            test_loss = criterion(test_outputs, targets)
            total_test_loss += test_loss.item()
            total_test_preds.append(test_outputs.detach().numpy())
            total_test_labels.append(targets.detach().numpy())
        
    # Average test loss for the epoch
    avg_test_loss = total_test_loss / len(test_loader)
    test_losses.append(avg_test_loss)

    # Calculate R² score for train and test data
    train_preds = np.concatenate(total_train_preds)
    train_labels = np.concatenate(total_train_labels)
    test_preds = np.concatenate(total_test_preds)
    test_labels = np.concatenate(total_test_labels)
    
    train_r2 = r2_score(train_labels, train_preds)
    test_r2 = r2_score(test_labels, test_preds)
    
    train_r2_scores.append(train_r2)
    test_r2_scores.append(test_r2)

    # Track the learning rate
    learning_rates.append(optimizer.param_groups[0]['lr'])

    # Print progress every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}, "
              f"Train R²: {train_r2:.4f}, Test R²: {test_r2:.4f}, Learning Rate: {optimizer.param_groups[0]['lr']}")

# Step 6: Save the results to a CSV file
results = {
    'Epoch': np.arange(1, epochs + 1),
    'Train Loss': train_losses,
    'Test Loss': test_losses,
    'Train R²': train_r2_scores,
    'Test R²': test_r2_scores,
    'Learning Rate': learning_rates
}

results_df = pd.DataFrame(results)
results_df.to_csv('related_work_sine_wave_SGD_0.01.csv', index=False)

print("Training complete. Results saved to 'training_results_with_batch_size.csv'.")


Epoch [10/50], Train Loss: 0.4068, Test Loss: 0.4103, Train R²: 0.6957, Test R²: 0.6831, Learning Rate: 0.006552111357450485
Epoch [20/50], Train Loss: 0.3642, Test Loss: 0.3695, Train R²: 0.7276, Test R²: 0.7149, Learning Rate: 0.022545477375388145
Epoch [30/50], Train Loss: 0.3395, Test Loss: 0.3447, Train R²: 0.7460, Test R²: 0.7338, Learning Rate: 0.013750584796071053
Epoch [40/50], Train Loss: 0.3253, Test Loss: 0.3292, Train R²: 0.7566, Test R²: 0.7463, Learning Rate: 0.0048096803948283195
Epoch [50/50], Train Loss: 0.3124, Test Loss: 0.3281, Train R²: 0.7663, Test R²: 0.7467, Learning Rate: 0.019858647137880325
Training complete. Results saved to 'training_results_with_batch_size.csv'.
