In [1]:
import os
import urllib.request
import zipfile

url = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip'
dataset_dir = 'tiny-imagenet-200'

if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)
    print("Downloading Tiny ImageNet dataset...")
    urllib.request.urlretrieve(url, 'tiny-imagenet-200.zip')
    print("Download completed. Extracting files...")
    
    # Extract the dataset
    with zipfile.ZipFile('tiny-imagenet-200.zip', 'r') as zip_ref:
        zip_ref.extractall('.')
    print("Extraction completed.")
else:
    print("Tiny ImageNet dataset already exists.")


Tiny ImageNet dataset already exists.


* Computing Mean and SD

In [2]:
import torch
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np
from tqdm import tqdm

simple_transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = ImageFolder(root='tiny-imagenet-200/train', transform=simple_transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False, num_workers=4)

def compute_mean_std(loader):
    mean = 0.
    std = 0.
    total_images_count = 0
    
    for images, _ in tqdm(loader, desc="Computing mean and std"):
        images = images.view(images.size(0), images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        total_images_count += images.size(0)
    
    mean /= total_images_count
    std /= total_images_count
    return mean, std

mean, std = compute_mean_std(train_loader)
print(f'Mean: {mean}')
print(f'Std: {std}')


Computing mean and std: 100%|██████████| 1563/1563 [00:43<00:00, 35.89it/s] 

Mean: tensor([0.4802, 0.4481, 0.3975])
Std: tensor([0.2296, 0.2263, 0.2255])





In [3]:
mean = mean.tolist()
std = std.tolist()

train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

train_dataset = ImageFolder(root='tiny-imagenet-200/train', transform=train_transforms)
val_dataset = ImageFolder(root='tiny-imagenet-200/val', transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)


In [4]:
import torchvision.models as models

# Load ResNet18 model
model = models.resnet18(pretrained=False, num_classes=200)



In [5]:
import torch.optim as optim
import torch.nn as nn

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
device = torch.device('mps') if torch.backends.mps.is_available() else torch.device('cpu')

In [6]:
def train(model, train_loader, criterion, optimizer, epoch):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    print(f'Epoch [{epoch+1}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100.*correct/total:.2f}%')
    
    

# Evaluate function with tqdm
def evaluate(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc="Evaluating"):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    print(f'Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {100.*correct/total:.2f}%')

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    train(model, train_loader, criterion, optimizer, epoch)
    evaluate(model, val_loader, criterion)


Training Epoch 1: 100%|██████████| 1563/1563 [37:08<00:00,  1.43s/it]


Epoch [1], Loss: 4.7583, Accuracy: 5.38%


Evaluating: 100%|██████████| 157/157 [02:10<00:00,  1.20it/s]


Validation Loss: 7.8237, Accuracy: 0.46%


Training Epoch 2: 100%|██████████| 1563/1563 [36:22<00:00,  1.40s/it]


Epoch [2], Loss: 4.2255, Accuracy: 11.12%


Evaluating: 100%|██████████| 157/157 [02:07<00:00,  1.23it/s]


Validation Loss: 8.9501, Accuracy: 0.21%


Training Epoch 3: 100%|██████████| 1563/1563 [38:44<00:00,  1.49s/it]


Epoch [3], Loss: 3.9638, Accuracy: 14.83%


Evaluating: 100%|██████████| 157/157 [01:43<00:00,  1.52it/s]


Validation Loss: 9.3596, Accuracy: 0.22%


Training Epoch 4: 100%|██████████| 1563/1563 [28:06<00:00,  1.08s/it]


Epoch [4], Loss: 3.7865, Accuracy: 17.60%


Evaluating: 100%|██████████| 157/157 [01:43<00:00,  1.51it/s]


Validation Loss: 9.4512, Accuracy: 0.36%


Training Epoch 5: 100%|██████████| 1563/1563 [28:37<00:00,  1.10s/it]


Epoch [5], Loss: 3.6403, Accuracy: 19.82%


Evaluating: 100%|██████████| 157/157 [02:06<00:00,  1.24it/s]


Validation Loss: 9.7992, Accuracy: 0.71%


Training Epoch 6: 100%|██████████| 1563/1563 [45:28<00:00,  1.75s/it]


Epoch [6], Loss: 3.5198, Accuracy: 22.05%


Evaluating: 100%|██████████| 157/157 [02:52<00:00,  1.10s/it]


Validation Loss: 9.1085, Accuracy: 1.08%


Training Epoch 7:  62%|██████▏   | 965/1563 [39:50<16:59,  1.71s/it]  

In [None]:
import matplotlib.pyplot as plt
def condition_number(layer):
    weights = layer.weight.data.cpu().numpy()
    u, s, vh = np.linalg.svd(weights, full_matrices=False)
    cond_number = s[0] / s[-1]
    return cond_number

# Calculate layerwise condition numbers with tqdm
layerwise_condition_numbers = {}
for name, layer in tqdm(model.named_modules(), desc="Calculating condition numbers"):
    if hasattr(layer, 'weight'):
        layerwise_condition_numbers[name] = condition_number(layer)

print(layerwise_condition_numbers)

# Main experiment loop
learning_rates = [0.001, 0.01, 0.1]
condition_numbers = []
accuracies = []

for lr in learning_rates:
    print(f"\nTraining with learning rate: {lr}")
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
    for epoch in range(num_epochs):
        train(model, train_loader, criterion, optimizer, epoch)
    accuracy = evaluate(model, val_loader, criterion)
    accuracies.append(accuracy)
    
    cond_numbers = {}
    for name, layer in tqdm(model.named_modules(), desc="Calculating condition numbers"):
        if hasattr(layer, 'weight'):
            cond_numbers[name] = condition_number(layer)
    condition_numbers.append(cond_numbers)

# Plotting condition numbers
plt.figure(figsize=(12, 6))
for idx, lr in enumerate(learning_rates):
    plt.plot(list(condition_numbers[idx].values()), label=f'LR={lr}')

plt.xlabel('Layer Index')
plt.ylabel('Condition Number')
plt.legend()
plt.show()

# Plotting learning rates vs accuracies
plt.figure(figsize=(8, 6))
plt.plot(learning_rates, accuracies, marker='o')
plt.xlabel('Learning Rate')
plt.ylabel('Accuracy')
plt.xscale('log')
plt.show()
