In [54]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.transforms import autoaugment
from PIL import Image
import time
import wandb

import wandb
wandb.login()
    

True

In [55]:
#  1. Data Loading and Preprocessing with configurable augmentation
def get_data_loaders(batch_size=128, num_workers=0, config=None):
    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2470, 0.2435, 0.2616]
    )
    
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])
    
    # 使用 wandb config 来控制增强参数
    train_transform_list = [transforms.ToTensor(), normalize]
    if config.random_crop:
        train_transform_list.insert(0, transforms.RandomCrop(32, padding=4))
    if config.horizontal_flip:
        train_transform_list.insert(0, transforms.RandomHorizontalFlip())
    train_transform_list.insert(0, transforms.ColorJitter(
        brightness=config.color_jitter_brightness,
        contrast=0.2,
        saturation=0.2,
        hue=0.1
    ))
    train_transform_list.insert(0, autoaugment.AutoAugment(policy=autoaugment.AutoAugmentPolicy.CIFAR10))
    train_transform_list.append(transforms.RandomErasing(
        p=config.random_erasing_prob,
        scale=(0.02, 0.33),
        ratio=(0.3, 3.3),
        value=0
    ))
    
    train_transform = transforms.Compose(train_transform_list)
    
    train_dataset = torchvision.datasets.CIFAR10(
        root='./data', 
        train=True,
        download=True, 
        transform=train_transform
    )
    
    test_dataset = torchvision.datasets.CIFAR10(
        root='./data', 
        train=False,
        download=True, 
        transform=test_transform
    )
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size,
        shuffle=True, 
        num_workers=num_workers
    )
    
    test_loader = DataLoader(
        test_dataset, 
        batch_size=batch_size,
        shuffle=False, 
        num_workers=num_workers
    )
    
    return train_loader, test_loader

In [56]:
# 2. Model Definition (unchanged)
def count_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")
    return total_params, trainable_params

def get_resnet18_model(num_classes=10, pretrained=False):
    model = models.resnet18(weights=weights)
    model.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
    model.bn1 = nn.BatchNorm2d(32)
    model.maxpool = nn.Identity()
    
    model.layer1[0].conv1 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer1[0].conv2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer1[0].bn1 = nn.BatchNorm2d(32)
    model.layer1[0].bn2 = nn.BatchNorm2d(32)
    model.layer1[1].conv1 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer1[1].conv2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer1[1].bn1 = nn.BatchNorm2d(32)
    model.layer1[1].bn2 = nn.BatchNorm2d(32)
    
    model.layer2[0].conv1 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False)
    model.layer2[0].conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer2[0].bn1 = nn.BatchNorm2d(64)
    model.layer2[0].bn2 = nn.BatchNorm2d(64)
    model.layer2[0].downsample[0] = nn.Conv2d(32, 64, kernel_size=1, stride=2, bias=False)
    model.layer2[0].downsample[1] = nn.BatchNorm2d(64)
    model.layer2[1].conv1 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer2[1].conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer2[1].bn1 = nn.BatchNorm2d(64)
    model.layer2[1].bn2 = nn.BatchNorm2d(64)
    
    model.layer3[0].conv1 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False)
    model.layer3[0].conv2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer3[0].bn1 = nn.BatchNorm2d(128)
    model.layer3[0].bn2 = nn.BatchNorm2d(128)
    model.layer3[0].downsample[0] = nn.Conv2d(64, 128, kernel_size=1, stride=2, bias=False)
    model.layer3[0].downsample[1] = nn.BatchNorm2d(128)
    model.layer3[1].conv1 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer3[1].conv2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer3[1].bn1 = nn.BatchNorm2d(128)
    model.layer3[1].bn2 = nn.BatchNorm2d(128)
    
    model.layer4[0].conv1 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1, bias=False)
    model.layer4[0].conv2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer4[0].bn1 = nn.BatchNorm2d(256)
    model.layer4[0].bn2 = nn.BatchNorm2d(256)
    model.layer4[0].downsample[0] = nn.Conv2d(128, 256, kernel_size=1, stride=2, bias=False)
    model.layer4[0].downsample[1] = nn.BatchNorm2d(256)
    model.layer4[1].conv1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer4[1].conv2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False)
    model.layer4[1].bn1 = nn.BatchNorm2d(256)
    model.layer4[1].bn2 = nn.BatchNorm2d(256)
    
    model.fc = nn.Linear(256, num_classes)
    return model

In [57]:
# 3. Mixup Function (unchanged)
def mixup_data(batch, targets, alpha=1.0, device='cpu'):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = batch.size()[0]
    index = torch.randperm(batch_size).to(device)

    mixed_x = lam * batch + (1 - lam) * batch[index]
    y_a, y_b = targets, targets[index]
    
    return mixed_x, y_a, y_b, lam

In [58]:
# 4. Training and Evaluation Functions with wandb logging
def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch, use_mixup=False, mixup_alpha=1.0):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    start_time = time.time()
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        if use_mixup:
            inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, alpha=mixup_alpha, device=device)
            outputs = model(inputs)
            loss = lam * criterion(outputs, targets_a) + (1 - lam) * criterion(outputs, targets_b)
        else:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if not use_mixup:
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    epoch_time = time.time() - start_time
    epoch_loss = running_loss / len(train_loader)
    
    if not use_mixup:
        epoch_acc = 100.0 * correct / total
        wandb.log({"train_loss": epoch_loss, "train_accuracy": epoch_acc, "epoch": epoch})
        print(f'Epoch: {epoch}, Train Loss: {epoch_loss:.3f}, Train Acc: {epoch_acc:.2f}%')
        return epoch_loss, epoch_acc
    else:
        wandb.log({"train_loss": epoch_loss, "epoch": epoch})
        print(f'Epoch: {epoch}, Train Loss: {epoch_loss:.3f}')
        return epoch_loss, None
    
def evaluate(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    test_loss = test_loss / len(test_loader)
    test_acc = 100.0 * correct / total
    wandb.log({"test_loss": test_loss, "test_accuracy": test_acc})
    print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%')
    
    return test_loss, test_acc

In [59]:
# # 5. Main Training Function with MPS Support
# def train(config=None):
#     with wandb.init(config=config):
#         config = wandb.config
        
#         # 设备检测：优先 MPS，然后 CUDA，最后 CPU
#         if torch.backends.mps.is_available():
#             device = torch.device("mps")
#             print(f"Using Apple MPS for acceleration")
#         elif torch.cuda.is_available():
#             device = torch.device("cuda")
#             print(f"Using CUDA for acceleration")
#         else:
#             device = torch.device("cpu")
#             print(f"Using CPU (no GPU/MPS available)")
        
#         # 获取数据加载器
#         train_loader, test_loader = get_data_loaders(batch_size=128, config=config)
        
#         # 初始化模型并移动到指定设备
#         model = get_resnet18_model(num_classes=10).to(device)
#         count_parameters(model)
        
#         # 定义损失函数和优化器
#         criterion = nn.CrossEntropyLoss()
#         optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
        
#         # 获取训练的总轮数
#         num_epochs = config.num_epochs if hasattr(config, 'num_epochs') else 20
        
#         # 调整学习率调度器的最大周期为训练轮数
#         scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
        
#         # 训练循环
#         for epoch in range(num_epochs):
#             train_loss, _ = train_one_epoch(
#                 model, train_loader, criterion, optimizer, device, epoch+1,
#                 use_mixup=True, mixup_alpha=config.mixup_alpha
#             )
#             test_loss, test_acc = evaluate(model, test_loader, criterion, device)
#             scheduler.step()

# 5. Main Training Function with MPS Support

def train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        
        # Get the unique run ID to use in filenames
        run_id = wandb.run.id
        
        # 设备检测：优先 MPS，然后 CUDA，最后 CPU
        if torch.backends.mps.is_available():
            device = torch.device("mps")
            print(f"Using Apple MPS for acceleration")
        elif torch.cuda.is_available():
            device = torch.device("cuda")
            print(f"Using CUDA for acceleration")
        else:
            device = torch.device("cpu")
            print(f"Using CPU (no GPU/MPS available)")
        
        # 获取数据加载器
        train_loader, test_loader = get_data_loaders(batch_size=128, config=config)
        
        # 初始化模型并移动到指定设备
        model = get_resnet18_model(num_classes=10).to(device)
        count_parameters(model)
        
        # 定义损失函数和优化器
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
        
        # 获取训练的总轮数
        num_epochs = config.num_epochs if hasattr(config, 'num_epochs') else 20
        
        # 调整学习率调度器的最大周期为训练轮数
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
        
        # 跟踪最佳模型
        best_acc = 0.0
        
        # Create model filenames with run ID
        best_model_path = f'best_model_{run_id}.pth'
        final_model_path = f'final_model_{run_id}.pth'
        
        # 训练循环
        for epoch in range(num_epochs):
            train_loss, _ = train_one_epoch(
                model, train_loader, criterion, optimizer, device, epoch+1,
                use_mixup=True, mixup_alpha=config.mixup_alpha
            )
            test_loss, test_acc = evaluate(model, test_loader, criterion, device)
            scheduler.step()
            
            # 保存最佳模型
            if test_acc > best_acc:
                best_acc = test_acc
                # 保存到本地
                torch.save(model.state_dict(), best_model_path)
                # 保存到wandb
                wandb.save(best_model_path)
                
                # Also log the best accuracy to wandb config for easy retrieval
                wandb.run.summary['best_accuracy'] = best_acc
        
        # 保存最终模型
        torch.save(model.state_dict(), final_model_path)
        wandb.save(final_model_path)
        
        # Log final test accuracy
        wandb.run.summary['final_accuracy'] = test_acc
        
        # Also save a record of which model is best for this run
        with open(f'model_info_{run_id}.txt', 'w') as f:
            f.write(f"Best model: {best_model_path}, Accuracy: {best_acc:.2f}%\n")
            f.write(f"Final model: {final_model_path}, Accuracy: {test_acc:.2f}%\n")
            f.write("\nRun Configuration:\n")
            for key, value in config._items.items():
                f.write(f"{key}: {value}\n")
        wandb.save(f'model_info_{run_id}.txt')

In [None]:
# 6. Sweep Configuration and Execution
if __name__ == '__main__':
    torch.manual_seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    
    wandb.login()
    sweep_config = {
        "method": "grid",
        "metric": {"name": "test_accuracy", "goal": "maximize"},
        "parameters": {
            "mixup_alpha": {"values": [0.2,0.4]},
            "random_crop": {"values": [True]},
            "horizontal_flip": {"values": [True]}, #Horizontal Flip 确实有帮助！
            "color_jitter_brightness": {"values": [0.2,0.4]},
            "random_erasing_prob": {"values": [0.2,0.4]},
            "num_epochs": {"values": [150]}  # Add this line to set num_epochs
        }
    }
    
    sweep_id = wandb.sweep(sweep_config, project="cifar10-augmentation-sweep")
    # wandb.agent(sweep_id, train, count=10)  # count=20 表示运行20次实验
    wandb.agent(sweep_id, train)  # 不需要count参数，会自动运行所有组合

Create sweep with ID: knffhnpf
Sweep URL: https://wandb.ai/jl10897-new-york-university/cifar10-augmentation-sweep/sweeps/knffhnpf


[34m[1mwandb[0m: Agent Starting Run: f3szj3jc with config:
[34m[1mwandb[0m: 	color_jitter_brightness: 0.2
[34m[1mwandb[0m: 	horizontal_flip: True
[34m[1mwandb[0m: 	mixup_alpha: 0.2
[34m[1mwandb[0m: 	num_epochs: 150
[34m[1mwandb[0m: 	random_crop: True
[34m[1mwandb[0m: 	random_erasing_prob: 0.2


[1;34mwandb[0m: 
[1;34mwandb[0m: 🚀 View run [33mprime-sweep-2[0m at: [34mhttps://wandb.ai/jl10897-new-york-university/cifar10-augmentation-sweep/runs/riktn1e3[0m
[1;34mwandb[0m: Find logs at: [1;35mwandb/run-20250306_201245-riktn1e3/logs[0m


Using Apple MPS for acceleration




Total parameters: 2,797,610
Trainable parameters: 2,797,610
Epoch: 1, Train Loss: 2.086
Test Loss: 1.546, Test Acc: 42.53%
Epoch: 2, Train Loss: 1.768
Test Loss: 1.702, Test Acc: 44.99%
Epoch: 3, Train Loss: 1.588
Test Loss: 1.052, Test Acc: 61.57%
Epoch: 4, Train Loss: 1.420
Test Loss: 1.135, Test Acc: 59.24%
Epoch: 5, Train Loss: 1.366
Test Loss: 0.975, Test Acc: 66.83%
Epoch: 6, Train Loss: 1.316
Test Loss: 0.848, Test Acc: 71.42%
Epoch: 7, Train Loss: 1.245
Test Loss: 0.763, Test Acc: 73.78%
Epoch: 8, Train Loss: 1.213
Test Loss: 0.854, Test Acc: 70.75%
Epoch: 9, Train Loss: 1.232
Test Loss: 0.733, Test Acc: 76.60%
Epoch: 10, Train Loss: 1.185
Test Loss: 0.770, Test Acc: 74.01%
Epoch: 11, Train Loss: 1.143
Test Loss: 0.733, Test Acc: 75.39%
Epoch: 12, Train Loss: 1.132
Test Loss: 0.841, Test Acc: 72.04%
Epoch: 13, Train Loss: 1.157
Test Loss: 0.854, Test Acc: 71.22%
Epoch: 14, Train Loss: 1.155
Test Loss: 0.660, Test Acc: 78.34%
Epoch: 15, Train Loss: 1.122
Test Loss: 0.722, Test A

0,1
epoch,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
test_accuracy,▂▁▃▃▃▃▄▃▄▄▃▄▃▃▅▂▅▅▅▅▅▆▆▆▇▇▆▇▇▇██████████
test_loss,█▅▄▄▃▃▃▃▃▃▃▃▃▂▂▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁
train_loss,█▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
best_accuracy,94.74
epoch,150.0
final_accuracy,94.52
test_accuracy,94.52
test_loss,0.21232
train_loss,0.63485


[34m[1mwandb[0m: Agent Starting Run: i8krnwwp with config:
[34m[1mwandb[0m: 	color_jitter_brightness: 0.2
[34m[1mwandb[0m: 	horizontal_flip: True
[34m[1mwandb[0m: 	mixup_alpha: 0.2
[34m[1mwandb[0m: 	num_epochs: 150
[34m[1mwandb[0m: 	random_crop: True
[34m[1mwandb[0m: 	random_erasing_prob: 0.4


Using Apple MPS for acceleration




Total parameters: 2,797,610
Trainable parameters: 2,797,610
Epoch: 1, Train Loss: 2.116
Test Loss: 1.726, Test Acc: 36.49%
Epoch: 2, Train Loss: 1.898
Test Loss: 1.382, Test Acc: 49.09%
Epoch: 3, Train Loss: 1.735
Test Loss: 1.245, Test Acc: 55.50%
Epoch: 4, Train Loss: 1.592
Test Loss: 1.125, Test Acc: 60.18%
Epoch: 5, Train Loss: 1.488
Test Loss: 1.057, Test Acc: 63.19%
Epoch: 6, Train Loss: 1.392
Test Loss: 0.915, Test Acc: 68.63%
Epoch: 7, Train Loss: 1.352
Test Loss: 1.033, Test Acc: 63.98%
Epoch: 8, Train Loss: 1.313
Test Loss: 0.949, Test Acc: 68.06%
Epoch: 9, Train Loss: 1.262
Test Loss: 0.733, Test Acc: 75.75%
Epoch: 10, Train Loss: 1.272
Test Loss: 0.922, Test Acc: 69.09%
Epoch: 11, Train Loss: 1.262
Test Loss: 0.832, Test Acc: 73.04%
Epoch: 12, Train Loss: 1.242
Test Loss: 0.655, Test Acc: 78.91%
Epoch: 13, Train Loss: 1.193
Test Loss: 0.730, Test Acc: 75.20%
Epoch: 14, Train Loss: 1.209
Test Loss: 0.739, Test Acc: 75.63%
Epoch: 15, Train Loss: 1.209
Test Loss: 0.908, Test A

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇█████
test_accuracy,▁▄▆▅▅▄▆▆▆▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇███████████
test_loss,█▅▅▅▇▅▅▄▇▄▆▄▄▅▅▃▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁
train_loss,█▅▅▅▄▅▄▄▄▄▄▄▄▄▄▄▄▃▄▃▃▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁

0,1
best_accuracy,94.88
epoch,150.0
final_accuracy,94.8
test_accuracy,94.8
test_loss,0.2444
train_loss,0.66297


[34m[1mwandb[0m: Agent Starting Run: accyczi9 with config:
[34m[1mwandb[0m: 	color_jitter_brightness: 0.2
[34m[1mwandb[0m: 	horizontal_flip: True
[34m[1mwandb[0m: 	mixup_alpha: 0.4
[34m[1mwandb[0m: 	num_epochs: 150
[34m[1mwandb[0m: 	random_crop: True
[34m[1mwandb[0m: 	random_erasing_prob: 0.2


Using Apple MPS for acceleration




Total parameters: 2,797,610
Trainable parameters: 2,797,610
Epoch: 1, Train Loss: 2.146
Test Loss: 1.674, Test Acc: 37.95%
Epoch: 2, Train Loss: 1.939
Test Loss: 1.484, Test Acc: 45.31%
Epoch: 3, Train Loss: 1.757
Test Loss: 1.408, Test Acc: 50.95%
Epoch: 4, Train Loss: 1.618
Test Loss: 1.204, Test Acc: 57.97%
Epoch: 5, Train Loss: 1.544
Test Loss: 1.020, Test Acc: 66.49%
Epoch: 6, Train Loss: 1.469
Test Loss: 0.972, Test Acc: 68.28%
Epoch: 7, Train Loss: 1.410
Test Loss: 1.084, Test Acc: 62.23%
Epoch: 8, Train Loss: 1.410
Test Loss: 0.910, Test Acc: 70.18%
Epoch: 9, Train Loss: 1.385
Test Loss: 0.886, Test Acc: 70.98%
Epoch: 10, Train Loss: 1.382
Test Loss: 0.889, Test Acc: 69.65%
Epoch: 11, Train Loss: 1.319
Test Loss: 1.062, Test Acc: 64.85%
Epoch: 12, Train Loss: 1.376
Test Loss: 0.749, Test Acc: 75.75%
Epoch: 13, Train Loss: 1.321
Test Loss: 0.741, Test Acc: 75.67%
Epoch: 14, Train Loss: 1.310
Test Loss: 0.793, Test Acc: 74.97%
Epoch: 15, Train Loss: 1.277
Test Loss: 0.931, Test A

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
test_accuracy,▁▃▃▅▄▅▆▆▆▆▆▆▆▇▆▆▆▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇████████
test_loss,█▇▇▆▅▄▃▃▃▃▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁
train_loss,█▇▅▅▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▃▂▃▂▃▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁

0,1
best_accuracy,94.83
epoch,150.0
final_accuracy,94.7
test_accuracy,94.7
test_loss,0.22685
train_loss,0.80899


[34m[1mwandb[0m: Agent Starting Run: exoic84s with config:
[34m[1mwandb[0m: 	color_jitter_brightness: 0.2
[34m[1mwandb[0m: 	horizontal_flip: True
[34m[1mwandb[0m: 	mixup_alpha: 0.4
[34m[1mwandb[0m: 	num_epochs: 150
[34m[1mwandb[0m: 	random_crop: True
[34m[1mwandb[0m: 	random_erasing_prob: 0.4


Using Apple MPS for acceleration




Total parameters: 2,797,610
Trainable parameters: 2,797,610
Epoch: 1, Train Loss: 2.171
Test Loss: 1.725, Test Acc: 35.50%
Epoch: 2, Train Loss: 1.948
Test Loss: 1.867, Test Acc: 31.38%
Epoch: 3, Train Loss: 1.807
Test Loss: 1.673, Test Acc: 38.37%
Epoch: 4, Train Loss: 1.694
Test Loss: 1.639, Test Acc: 39.28%
Epoch: 5, Train Loss: 1.630
Test Loss: 1.464, Test Acc: 49.88%
Epoch: 6, Train Loss: 1.558
Test Loss: 1.310, Test Acc: 55.87%
Epoch: 7, Train Loss: 1.494
Test Loss: 0.960, Test Acc: 67.81%
Epoch: 8, Train Loss: 1.489
Test Loss: 0.976, Test Acc: 66.42%
Epoch: 9, Train Loss: 1.429
Test Loss: 1.314, Test Acc: 57.32%
Epoch: 10, Train Loss: 1.408
Test Loss: 0.960, Test Acc: 68.10%
Epoch: 11, Train Loss: 1.392
Test Loss: 0.804, Test Acc: 74.36%
Epoch: 12, Train Loss: 1.373
Test Loss: 0.758, Test Acc: 75.55%
Epoch: 13, Train Loss: 1.366
Test Loss: 0.859, Test Acc: 72.12%
Epoch: 14, Train Loss: 1.362
Test Loss: 0.935, Test Acc: 69.43%
Epoch: 15, Train Loss: 1.342
Test Loss: 0.914, Test A

0,1
epoch,▁▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇██
test_accuracy,▁▅▆▅▅▆▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▆▇▇▇▇▇▇▇▇█▇████████
test_loss,█▇▅▇▅▄▄▄▄▄▅▄▃▃▄▃▃▄▃▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁
train_loss,█▆▅▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁

0,1
best_accuracy,95.0
epoch,150.0
final_accuracy,94.9
test_accuracy,94.9
test_loss,0.28779
train_loss,0.85325


[34m[1mwandb[0m: Agent Starting Run: mv0tuoii with config:
[34m[1mwandb[0m: 	color_jitter_brightness: 0.4
[34m[1mwandb[0m: 	horizontal_flip: True
[34m[1mwandb[0m: 	mixup_alpha: 0.2
[34m[1mwandb[0m: 	num_epochs: 150
[34m[1mwandb[0m: 	random_crop: True
[34m[1mwandb[0m: 	random_erasing_prob: 0.2


Using Apple MPS for acceleration




Total parameters: 2,797,610
Trainable parameters: 2,797,610
Epoch: 1, Train Loss: 2.154
Test Loss: 1.725, Test Acc: 35.74%
Epoch: 2, Train Loss: 1.927
Test Loss: 1.430, Test Acc: 47.98%
Epoch: 3, Train Loss: 1.732
Test Loss: 1.302, Test Acc: 53.50%
Epoch: 4, Train Loss: 1.586
Test Loss: 1.021, Test Acc: 64.56%
Epoch: 5, Train Loss: 1.450
Test Loss: 1.076, Test Acc: 62.70%
Epoch: 6, Train Loss: 1.393
Test Loss: 0.965, Test Acc: 67.68%
Epoch: 7, Train Loss: 1.312
Test Loss: 0.805, Test Acc: 72.27%
Epoch: 8, Train Loss: 1.277
Test Loss: 1.126, Test Acc: 62.29%
Epoch: 9, Train Loss: 1.221
Test Loss: 0.721, Test Acc: 77.04%
Epoch: 10, Train Loss: 1.212
Test Loss: 0.738, Test Acc: 76.51%
Epoch: 11, Train Loss: 1.206
Test Loss: 0.702, Test Acc: 77.14%
Epoch: 12, Train Loss: 1.178
Test Loss: 0.779, Test Acc: 73.47%
Epoch: 13, Train Loss: 1.182
Test Loss: 0.811, Test Acc: 73.52%
Epoch: 14, Train Loss: 1.141
Test Loss: 0.807, Test Acc: 72.56%
Epoch: 15, Train Loss: 1.124
Test Loss: 0.877, Test A

In [None]:
# 7. Prediction on Test Dataset
import pickle
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import wandb
import os
import glob

class CustomTestDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        image = self.data[idx]
        if self.transform:
            image = self.transform(image)
        return image, idx  # Return index as a placeholder for label

def predict_test_dataset():
    # Load the test dataset
    with open('/Users/JL/Desktop/DL_Proj/cifar_test_nolabel.pkl', 'rb') as f:
        test_data = pickle.load(f)
    
    # Define the same transformations used for testing
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.4914, 0.4822, 0.4465],
            std=[0.2470, 0.2435, 0.2616]
        )
    ])
    
    # Create test dataset and dataloader
    test_dataset = CustomTestDataset(test_data, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
    
    # Set device
    if torch.backends.mps.is_available():
        device = torch.device("mps")
        print(f"Using Apple MPS for acceleration")
    elif torch.cuda.is_available():
        device = torch.device("cuda")
        print(f"Using CUDA for acceleration")
    else:
        device = torch.device("cpu")
        print(f"Using CPU (no GPU/MPS available)")
    
    # Initialize model using the same architecture as in training
    model = get_resnet18_model(num_classes=10).to(device)
    
    # Try to find the best model from all runs
    try:
        api = wandb.Api()
        runs = api.runs("cifar10-augmentation-sweep")
        best_run = None
        best_accuracy = 0
        
        for run in runs:
            if run.state == "finished" and run.summary.get("best_accuracy", 0) > best_accuracy:
                best_accuracy = run.summary.get("best_accuracy", 0)
                best_run = run
        
        if best_run:
            run_id = best_run.id
            best_model_path = f'best_model_{run_id}.pth'
            print(f"Using best model from run {best_run.name} with accuracy {best_accuracy:.2f}%")
            
            try:
                # Try to download the model file
                model_file = best_run.file(best_model_path).download(replace=True)
                model.load_state_dict(torch.load(model_file.name, map_location=device))
                print(f"Successfully loaded model from wandb")
            except Exception as e:
                print(f"Error downloading model from wandb: {e}")
                
                # Try to find the model locally
                if os.path.exists(best_model_path):
                    model.load_state_dict(torch.load(best_model_path, map_location=device))
                    print(f"Loaded model from local file: {best_model_path}")
                else:
                    print(f"Could not find model file: {best_model_path}")
                    
                    # Try to find any best model file locally
                    best_models = glob.glob('best_model_*.pth')
                    if best_models:
                        latest_model = max(best_models, key=os.path.getctime)
                        model.load_state_dict(torch.load(latest_model, map_location=device))
                        print(f"Loaded most recent local model: {latest_model}")
                    else:
                        print("No model files found locally")
    except Exception as e:
        print(f"Error accessing wandb: {e}")
        
        # Try to find any best model file locally
        best_models = glob.glob('best_model_*.pth')
        if best_models:
            latest_model = max(best_models, key=os.path.getctime)
            model.load_state_dict(torch.load(latest_model, map_location=device))
            print(f"Loaded most recent local model: {latest_model}")
        else:
            print("WARNING: Could not load any model. Using untrained model for predictions.")
    
    model.eval()
    
    # Make predictions
    predictions = []
    with torch.no_grad():
        for images, indices in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            
            for idx, pred in zip(indices.tolist(), preds.tolist()):
                predictions.append((idx, pred))
    
    # Sort predictions by index
    predictions.sort(key=lambda x: x[0])
    
    # Create submission DataFrame
    submission_df = pd.DataFrame(predictions, columns=['ID', 'Labels'])
    
    # Save to CSV
    submission_df.to_csv('submission.csv', index=False)
    print(f"Predictions saved to submission.csv")

# Run the prediction function
predict_test_dataset()

In [53]:
# import pickle
# import pandas as pd
# import torch
# from torchvision import transforms
# from torch.utils.data import DataLoader, Dataset
# import os
# import glob
# from PIL import Image

# def predict_test_dataset(model_path=None):
#     print("Starting prediction function...")
    
#     # Load the test dataset
#     print("Loading test data...")
#     with open('/Users/JL/Desktop/DL_Proj/cifar_test_nolabel.pkl', 'rb') as f:
#         test_data = pickle.load(f)
#     print("Test data loaded successfully!")
    
#     # Define the same transformations used for testing
#     test_transform = transforms.Compose([
#         transforms.ToTensor(),
#         transforms.Normalize(
#             mean=[0.4914, 0.4822, 0.4465],
#             std=[0.2470, 0.2435, 0.2616]
#         )
#     ])
    
#     # Create a proper dataset based on the actual structure
#     class CIFARTestDataset(Dataset):
#         def __init__(self, data_dict, transform=None):
#             self.images = data_dict[b'data']  # Already in shape (10000, 32, 32, 3)
#             self.ids = data_dict[b'ids']
#             self.transform = transform
            
#         def __len__(self):
#             return len(self.images)
        
#         def __getitem__(self, idx):
#             image = self.images[idx]
#             image_id = int(self.ids[idx])  # Convert to int for DataFrame
            
#             # Convert to PIL Image for transforms
#             image = Image.fromarray(image)
            
#             if self.transform:
#                 image = self.transform(image)
                
#             return image, image_id
    
#     # Create test dataset and dataloader
#     print("Creating dataset and dataloader...")
#     test_dataset = CIFARTestDataset(test_data, transform=test_transform)
#     test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
#     print(f"Dataset created with {len(test_dataset)} samples")
    
#     # Set device
#     print("Setting up device...")
#     if torch.backends.mps.is_available():
#         device = torch.device("mps")
#         print(f"Using Apple MPS for acceleration")
#     elif torch.cuda.is_available():
#         device = torch.device("cuda")
#         print(f"Using CUDA for acceleration")
#     else:
#         device = torch.device("cpu")
#         print(f"Using CPU (no GPU/MPS available)")
    
#     # Initialize model using the same architecture as in training
#     print("Initializing model...")
#     model = get_resnet18_model(num_classes=10).to(device)
#     print("Model initialized")
    
#     # Load the specified model if provided
#     print("Loading model weights...")
#     if model_path and os.path.exists(model_path):
#         model.load_state_dict(torch.load(model_path, map_location=device))
#         print(f"Successfully loaded model from: {model_path}")
#     else:
#         # If no specific model is provided, try to find the best model
#         if model_path:
#             print(f"Specified model {model_path} not found.")
        
#         # Try to find any best model file locally
#         best_models = glob.glob('best_model_*.pth')
#         if best_models:
#             latest_model = max(best_models, key=os.path.getctime)
#             model.load_state_dict(torch.load(latest_model, map_location=device))
#             print(f"Loaded most recent local model: {latest_model}")
#         else:
#             print("WARNING: Could not load any model. Using untrained model for predictions.")
    
#     # Set model to evaluation mode
#     model.eval()
#     print("Model set to evaluation mode")
    
#     # Make predictions
#     print("Making predictions...")
#     predictions = []
#     with torch.no_grad():
#         for batch_idx, (images, ids) in enumerate(test_loader):
#             print(f"Processing batch {batch_idx+1}/{len(test_loader)}")
#             images = images.to(device)
#             outputs = model(images)
#             _, preds = torch.max(outputs, 1)
            
#             for id_val, pred in zip(ids.numpy(), preds.cpu().numpy()):
#                 predictions.append((id_val, pred))
    
#     # Create submission DataFrame
#     print("Creating submission file...")
#     submission_df = pd.DataFrame(predictions, columns=['ID', 'Labels'])
    
#     # Save to CSV
#     submission_df.to_csv('submission.csv', index=False)
#     print(f"Predictions saved to submission.csv")
#     print("Prediction function completed successfully!")

# # Call the function with your model path
# print("About to call prediction function...")
# predict_test_dataset("/Users/JL/Desktop/DL_Proj/final_model_ku8uu15z.pth")
# print("Prediction function call completed.")

About to call prediction function...
Starting prediction function...
Loading test data...
Test data loaded successfully!
Creating dataset and dataloader...
Dataset created with 10000 samples
Setting up device...
Using Apple MPS for acceleration
Initializing model...




Model initialized
Loading model weights...
Successfully loaded model from: /Users/JL/Desktop/DL_Proj/final_model_ku8uu15z.pth
Model set to evaluation mode
Making predictions...
Processing batch 1/100
Processing batch 2/100
Processing batch 3/100
Processing batch 4/100
Processing batch 5/100
Processing batch 6/100
Processing batch 7/100
Processing batch 8/100
Processing batch 9/100
Processing batch 10/100
Processing batch 11/100
Processing batch 12/100
Processing batch 13/100
Processing batch 14/100
Processing batch 15/100
Processing batch 16/100
Processing batch 17/100
Processing batch 18/100
Processing batch 19/100
Processing batch 20/100
Processing batch 21/100
Processing batch 22/100
Processing batch 23/100
Processing batch 24/100
Processing batch 25/100
Processing batch 26/100
Processing batch 27/100
Processing batch 28/100
Processing batch 29/100
Processing batch 30/100
Processing batch 31/100
Processing batch 32/100
Processing batch 33/100
Processing batch 34/100
Processing batch

In [52]:
# import pickle
# import pandas as pd
# import torch
# from torchvision import transforms
# from torch.utils.data import DataLoader, Dataset
# import os
# import glob
# from PIL import Image

# def debug_test_dataset():
#     print("Starting debug function...")
    
#     try:
#         # Load the test dataset
#         print("Attempting to load test data...")
#         with open('/Users/JL/Desktop/DL_Proj/cifar_test_nolabel.pkl', 'rb') as f:
#             test_data = pickle.load(f)
#         print("Test data loaded successfully!")
        
#         # Print test data structure for debugging
#         print(f"Test data type: {type(test_data)}")
        
#         if isinstance(test_data, dict):
#             print(f"Test data keys: {list(test_data.keys())}")
#         elif isinstance(test_data, list):
#             print(f"Test data is a list with {len(test_data)} items")
#         else:
#             print(f"Test data is of type {type(test_data)}")
            
#         print("Debug function completed successfully!")
#         return test_data
#     except Exception as e:
#         print(f"Error in debug function: {e}")
#         import traceback
#         traceback.print_exc()
#         return None

# # Call the debug function
# test_data = debug_test_dataset()

# # If we got data, try to examine it further
# if test_data is not None:
#     print("\nExamining test data further...")
#     if isinstance(test_data, dict):
#         for key in test_data.keys():
#             print(f"Key: {key}, Type: {type(test_data[key])}")
#             if isinstance(test_data[key], (list, tuple, np.ndarray)):
#                 print(f"Length/Shape: {len(test_data[key]) if isinstance(test_data[key], (list, tuple)) else test_data[key].shape}")

Starting debug function...
Attempting to load test data...
Test data loaded successfully!
Test data type: <class 'dict'>
Test data keys: [b'data', b'ids']
Debug function completed successfully!

Examining test data further...
Key: b'data', Type: <class 'numpy.ndarray'>
Length/Shape: (10000, 32, 32, 3)
Key: b'ids', Type: <class 'numpy.ndarray'>
Length/Shape: (10000,)
