<a href="https://colab.research.google.com/github/TheS1n233/Distributed-Learning-Project5/blob/test/YUJIE-Hyperparameters%20Choosing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# install torch and torchvision

In [1]:
!pip install torch torchvision matplotlib




# install dataset CIFAR-100

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np

# Data preprocessing
class Cutout(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, img):
        h, w = img.size(1), img.size(2)
        mask = np.ones((h, w), np.float32)
        y = np.random.randint(h)
        x = np.random.randint(w)
        y1 = np.clip(y - self.size // 2, 0, h)
        y2 = np.clip(y + self.size // 2, 0, h)
        x1 = np.clip(x - self.size // 2, 0, w)
        x2 = np.clip(x + self.size // 2, 0, w)
        mask[y1: y2, x1: x2] = 0
        img = img * torch.from_numpy(mask)
        return img

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    Cutout(size=8),  # Add Cutout
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


# Download CIFAR-100 dataset
train_dataset = torchvision.datasets.CIFAR100(
    root='./data',
    train=True,
    download=True,
    transform=transform_train
)
test_dataset = torchvision.datasets.CIFAR100(
    root='./data',
    train=False,
    download=True,
    transform=transform_test
)
# Split training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)



Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:03<00:00, 42.8MB/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


# Hyperparameters choosing

In [3]:
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import torchvision
import torchvision.transforms as transforms
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# Data preprocessing
class Cutout(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, img):
        # 如果 img 是 PIL.Image，则先转换为 NumPy 数组
        if isinstance(img, Image.Image):
            img = np.array(img)

        h, w = img.shape[:2]
        mask = np.ones((h, w), np.float32)
        y = np.random.randint(h)
        x = np.random.randint(w)
        y1 = np.clip(y - self.size // 2, 0, h)
        y2 = np.clip(y + self.size // 2, 0, h)
        x1 = np.clip(x - self.size // 2, 0, w)
        x2 = np.clip(x + self.size // 2, 0, w)
        mask[y1: y2, x1: x2] = 0
        img = img * mask[:, :, np.newaxis]  # 针对 RGB 通道应用遮罩

        return Image.fromarray(np.uint8(img))

# Data transformations
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    Cutout(size=8),  # Add Cutout
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Download CIFAR-100 dataset
train_dataset = torchvision.datasets.CIFAR100(
    root='./data',
    train=True,
    download=True,
    transform=transform_train
)
test_dataset = torchvision.datasets.CIFAR100(
    root='./data',
    train=False,
    download=True,
    transform=transform_test
)

# Split training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.dropout1 = nn.Dropout(0.1)
        self.fc2 = nn.Linear(120, 84)
        self.dropout2 = nn.Dropout(0.1)
        self.fc3 = nn.Linear(84, 100)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Define random search function
def random_search(train_loader, val_loader, model_class, device, num_trials=10):
    param_space = {
        'lr': [1e-4, 1e-3, 1e-2, 1e-1],
        'weight_decay': [1e-5, 1e-4, 1e-3, 1e-2],
        'momentum': [0.9, 0.95]
    }

    results = []

    for trial in range(num_trials):
        print(f"Trial {trial + 1}/{num_trials}")
        lr = random.choice(param_space['lr'])
        weight_decay = random.choice(param_space['weight_decay'])
        momentum = random.choice(param_space['momentum'])

        model = model_class().to(device)
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
        scheduler = CosineAnnealingLR(optimizer, T_max=5)
        criterion = nn.CrossEntropyLoss()

        train_loss, val_loss, val_acc = train_and_evaluate(
            model, optimizer, scheduler, criterion, train_loader, val_loader, num_epochs=5, device=device
        )

        results.append({
            'lr': lr,
            'weight_decay': weight_decay,
            'momentum': momentum,
            'val_loss': val_loss,
            'val_acc': val_acc
        })

    results.sort(key=lambda x: x['val_acc'], reverse=True)
    print("\nBest Hyperparameters:")
    print(results[0])

    return results[0]  # Return the best result

# Training and evaluation function
def train_and_evaluate(model, optimizer, scheduler, criterion, train_loader, val_loader, num_epochs, device):
    model.train()
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        scheduler.step()

    model.eval()
    val_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_acc = 100. * correct / total
    val_loss /= len(val_loader)

    return loss.item(), val_loss, val_acc

# Main function
if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    best_hyperparams = random_search(
        train_loader=train_loader,
        val_loader=val_loader,
        model_class=LeNet5,
        device=device,
        num_trials=10
    )

    print("\nUsing Best Hyperparameters:")
    print(best_hyperparams)


Files already downloaded and verified
Files already downloaded and verified
Trial 1/10
Trial 2/10
Trial 3/10
Trial 4/10
Trial 5/10
Trial 6/10
Trial 7/10
Trial 8/10
Trial 9/10
Trial 10/10

Best Hyperparameters:
{'lr': 0.01, 'weight_decay': 1e-05, 'momentum': 0.95, 'val_loss': 3.7585234657214706, 'val_acc': 11.8}

Using Best Hyperparameters:
{'lr': 0.01, 'weight_decay': 1e-05, 'momentum': 0.95, 'val_loss': 3.7585234657214706, 'val_acc': 11.8}


# Centralized baseline

In [None]:

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt



# Initialize device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Training function
def train_model(model, optimizer, criterion, scheduler, train_loader, val_loader, num_epochs):
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    for epoch in range(num_epochs):
        model.train()
        train_loss, correct, total = 0, 0, 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_losses.append(train_loss / len(train_loader))
        train_accuracies.append(100. * correct / total)

        # Validation phase
        model.eval()
        val_loss, correct, total = 0, 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        val_losses.append(val_loss / len(val_loader))
        val_accuracies.append(100. * correct / total)

        scheduler.step()

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_accuracies[-1]:.2f}%, Val Loss: {val_losses[-1]:.4f}, Val Acc: {val_accuracies[-1]:.2f}%")

    return train_losses, val_losses, train_accuracies, val_accuracies

# Test function
def test_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    print(f"Test Accuracy: {100. * correct / total:.2f}%")

# # Training configurations
# num_epochs = 150
# criterion = nn.CrossEntropyLoss()

# # Train with SGDM
# model = LeNet5().to(device)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.005)
# scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
# print("Training with SGDM optimizer...")
# train_losses_sgdm, val_losses_sgdm, train_acc_sgdm, val_acc_sgdm = train_model(model, optimizer, criterion, scheduler, train_loader, val_loader, num_epochs)
# test_model(model, test_loader)

# # Train with AdamW
# model = LeNet5().to(device)
# optimizer = optim.AdamW(model.parameters(), lr=0.0015, weight_decay=0.005)
# scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
# print("Training with AdamW optimizer...")
# train_losses_adamw, val_losses_adamw, train_acc_adamw, val_acc_adamw = train_model(model, optimizer, criterion, scheduler, train_loader, val_loader, num_epochs)
# test_model(model, test_loader)

# Plot training and validation results
def plot_results(train_losses, val_losses, train_accuracies, val_accuracies, title):
    epochs = range(1, num_epochs + 1)
    plt.figure(figsize=(12, 5))

    # Plot losses
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label='Train Loss')
    plt.plot(epochs, val_losses, label='Val Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(f'{title} Loss')
    plt.legend()

    # Plot accuracies
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracies, label='Train Accuracy')
    plt.plot(epochs, val_accuracies, label='Val Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.title(f'{title} Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

# 从 Hyperparameters choosing 模块加载最佳超参数
best_hyperparams = {
    'lr': 0.01,  # 示例值，实际应从 random_search 的结果中提取
    'weight_decay': 0.005,
    'momentum': 0.9
}

# 使用最佳超参数训练模型
model = LeNet5().to(device)
optimizer = optim.SGD(
    model.parameters(),
    lr=best_hyperparams['lr'],
    momentum=best_hyperparams['momentum'],
    weight_decay=best_hyperparams['weight_decay']
)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=150)  # 假设训练150个epoch
criterion = nn.CrossEntropyLoss()

# 开始训练
print("Training with Best Hyperparameters...")
train_losses, val_losses, train_accuracies, val_accuracies = train_model(
    model, optimizer, criterion, scheduler, train_loader, val_loader, num_epochs=150
)

# 测试模型
test_model(model, test_loader)

# 可视化结果
plot_results(train_losses, val_losses, train_accuracies, val_accuracies, 'Best Hyperparameters')

# # Plot results for SGDM
# plot_results(train_losses_sgdm, val_losses_sgdm, train_acc_sgdm, val_acc_sgdm, 'SGDM')

# # Plot results for AdamW
# plot_results(train_losses_adamw, val_losses_adamw, train_acc_adamw, val_acc_adamw, 'AdamW')


import os
from google.colab import runtime
print("All code has finished running. Disconnecting GPU to save free time...")
runtime.unassign()

Training with Best Hyperparameters...
Epoch 1/150, Train Loss: 4.5692, Train Acc: 1.59%, Val Loss: 4.4131, Val Acc: 2.75%
Epoch 2/150, Train Loss: 4.2829, Train Acc: 4.38%, Val Loss: 4.1755, Val Acc: 5.47%
Epoch 3/150, Train Loss: 4.1347, Train Acc: 6.08%, Val Loss: 4.0520, Val Acc: 7.15%
Epoch 4/150, Train Loss: 4.0498, Train Acc: 7.36%, Val Loss: 4.0052, Val Acc: 7.78%
Epoch 5/150, Train Loss: 4.0131, Train Acc: 8.12%, Val Loss: 3.9771, Val Acc: 8.83%
Epoch 6/150, Train Loss: 3.9802, Train Acc: 8.52%, Val Loss: 3.9172, Val Acc: 8.89%
Epoch 7/150, Train Loss: 3.9371, Train Acc: 9.13%, Val Loss: 3.9121, Val Acc: 9.13%
Epoch 8/150, Train Loss: 3.9091, Train Acc: 9.53%, Val Loss: 3.8591, Val Acc: 10.12%
Epoch 9/150, Train Loss: 3.8821, Train Acc: 10.05%, Val Loss: 3.8219, Val Acc: 10.85%
Epoch 10/150, Train Loss: 3.8480, Train Acc: 10.58%, Val Loss: 3.8137, Val Acc: 11.60%
Epoch 11/150, Train Loss: 3.8261, Train Acc: 11.03%, Val Loss: 3.7444, Val Acc: 12.20%
Epoch 12/150, Train Loss: 3.7

# NEW OPTIMIZERS

In [None]:
# Train with NEW optimizer in this module