<a href="https://colab.research.google.com/github/Erik-Hansen96/CSCI167/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision datasets --quiet

In [2]:
from datasets import load_dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import torch
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
class HFImageDataset(torch.utils.data.Dataset):
    def __init__(self, hf_split, transform, image_key="img"):
        self.data = hf_split
        self.transform = transform
        self.image_key = image_key

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        img = sample[self.image_key]
        label = sample["label"]
        if self.transform:
            img = self.transform(img)
        return img, label


In [4]:
# Load CIFAR-10
ds_cifar = load_dataset("cifar10")

# Train/val split: 90% train, 10% val
cifar_train_val = ds_cifar["train"].train_test_split(test_size=0.1, seed=42)

cifar_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010]
    ),
])

cifar_train_ds = HFImageDataset(cifar_train_val["train"], cifar_transform, image_key="img")
cifar_val_ds   = HFImageDataset(cifar_train_val["test"], cifar_transform, image_key="img")
cifar_test_ds  = HFImageDataset(ds_cifar["test"], cifar_transform, image_key="img")

cifar_train_loader = DataLoader(cifar_train_ds, batch_size=128, shuffle=True,  num_workers=2)
cifar_val_loader   = DataLoader(cifar_val_ds,   batch_size=256, shuffle=False, num_workers=2)
cifar_test_loader  = DataLoader(cifar_test_ds,  batch_size=256, shuffle=False, num_workers=2)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/120M [00:00<?, ?B/s]

plain_text/test-00000-of-00001.parquet:   0%|          | 0.00/23.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [5]:
# Load Fashion-MNIST
ds_fmnist = load_dataset("zalando-datasets/fashion_mnist")

# Train/val split
fmnist_train_val = ds_fmnist["train"].train_test_split(test_size=0.1, seed=42)

fmnist_transform = transforms.Compose([
    transforms.Resize(32),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5]
    ),
])

fmnist_train_ds = HFImageDataset(fmnist_train_val["train"], fmnist_transform, image_key="image")
fmnist_val_ds   = HFImageDataset(fmnist_train_val["test"],  fmnist_transform, image_key="image")
fmnist_test_ds  = HFImageDataset(ds_fmnist["test"],         fmnist_transform, image_key="image")

fmnist_train_loader = DataLoader(fmnist_train_ds, batch_size=128, shuffle=True,  num_workers=2)
fmnist_val_loader   = DataLoader(fmnist_val_ds,   batch_size=256, shuffle=False, num_workers=2)
fmnist_test_loader  = DataLoader(fmnist_test_ds,  batch_size=256, shuffle=False, num_workers=2)


README.md: 0.00B [00:00, ?B/s]

fashion_mnist/train-00000-of-00001.parqu(…):   0%|          | 0.00/30.9M [00:00<?, ?B/s]

fashion_mnist/test-00000-of-00001.parque(…):   0%|          | 0.00/5.18M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/60000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [6]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)

        # After 3 pools: 32 -> 16 -> 8 -> 4
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 32x32 -> 16x16
        x = self.pool(F.relu(self.conv2(x)))  # 16x16 -> 8x8
        x = self.pool(F.relu(self.conv3(x)))  # 8x8 -> 4x4
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


In [7]:
from torchvision.models import resnet18

def make_resnet18(num_classes=10):
    model = resnet18(weights=None)  # no pretraining
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model


In [8]:
def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * labels.size(0)
        _, preds = outputs.max(1)
        total += labels.size(0)
        correct += preds.eq(labels).sum().item()

    return running_loss / total, correct / total


@torch.no_grad()
def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * labels.size(0)
        _, preds = outputs.max(1)
        total += labels.size(0)
        correct += preds.eq(labels).sum().item()

    return running_loss / total, correct / total


def train_model(model, train_loader, val_loader, num_epochs, lr, weight_decay=5e-4):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(
        model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay
    )

    history = {
        "train_loss": [], "train_acc": [],
        "val_loss": [], "val_acc": []
    }

    for epoch in range(num_epochs):
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = evaluate(model, val_loader, criterion, device)

        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        print(
            f"Epoch {epoch+1}/{num_epochs} | "
            f"train_loss={train_loss:.4f} acc={train_acc:.4f} | "
            f"val_loss={val_loss:.4f} acc={val_acc:.4f}"
        )

    return model, history


In [9]:
simple_cnn = SimpleCNN(num_classes=10)
model_cifar_simple, hist_cifar_simple = train_model(
    simple_cnn,
    cifar_train_loader,
    cifar_val_loader,
    num_epochs=20,
    lr=0.01,
    weight_decay=5e-4,
)


Epoch 1/20 | train_loss=1.8098 acc=0.3359 | val_loss=1.4580 acc=0.4660
Epoch 2/20 | train_loss=1.3628 acc=0.5059 | val_loss=1.1797 acc=0.5862
Epoch 3/20 | train_loss=1.1456 acc=0.5932 | val_loss=0.9923 acc=0.6524
Epoch 4/20 | train_loss=0.9899 acc=0.6519 | val_loss=0.9088 acc=0.6760
Epoch 5/20 | train_loss=0.8905 acc=0.6881 | val_loss=0.8474 acc=0.7052
Epoch 6/20 | train_loss=0.8026 acc=0.7196 | val_loss=0.8414 acc=0.7148
Epoch 7/20 | train_loss=0.7298 acc=0.7443 | val_loss=0.7785 acc=0.7364
Epoch 8/20 | train_loss=0.6733 acc=0.7650 | val_loss=0.7745 acc=0.7272
Epoch 9/20 | train_loss=0.6192 acc=0.7843 | val_loss=0.7256 acc=0.7602
Epoch 10/20 | train_loss=0.5703 acc=0.7995 | val_loss=0.7285 acc=0.7568
Epoch 11/20 | train_loss=0.5162 acc=0.8184 | val_loss=0.7190 acc=0.7578
Epoch 12/20 | train_loss=0.4808 acc=0.8311 | val_loss=0.6900 acc=0.7680
Epoch 13/20 | train_loss=0.4404 acc=0.8444 | val_loss=0.7284 acc=0.7674
Epoch 14/20 | train_loss=0.3985 acc=0.8594 | val_loss=0.7406 acc=0.7654
E

In [10]:
criterion = nn.CrossEntropyLoss()
test_loss, test_acc = evaluate(model_cifar_simple, cifar_test_loader, criterion, device)
print(f"SimpleCNN CIFAR-10 test: loss={test_loss:.4f}, acc={test_acc:.4f}")


SimpleCNN CIFAR-10 test: loss=0.8040, acc=0.7711


In [11]:
resnet = make_resnet18(num_classes=10)
model_cifar_resnet, hist_cifar_resnet = train_model(
    resnet,
    cifar_train_loader,
    cifar_val_loader,
    num_epochs=20,
    lr=0.1,      # ResNets often like a bit higher LR with SGD
    weight_decay=5e-4,
)
criterion = nn.CrossEntropyLoss()
test_loss, test_acc = evaluate(model_cifar_resnet, cifar_test_loader, criterion, device)
print(f"ResNet-18 CIFAR-10 test: loss={test_loss:.4f}, acc={test_acc:.4f}")


Epoch 1/20 | train_loss=2.0870 acc=0.3447 | val_loss=1.5029 acc=0.4396
Epoch 2/20 | train_loss=1.3533 acc=0.5119 | val_loss=1.3085 acc=0.5306
Epoch 3/20 | train_loss=1.1051 acc=0.6090 | val_loss=1.0852 acc=0.6168
Epoch 4/20 | train_loss=0.9439 acc=0.6679 | val_loss=0.9803 acc=0.6556
Epoch 5/20 | train_loss=0.8420 acc=0.7060 | val_loss=0.9749 acc=0.6564
Epoch 6/20 | train_loss=0.7584 acc=0.7352 | val_loss=0.9939 acc=0.6644
Epoch 7/20 | train_loss=0.7012 acc=0.7556 | val_loss=0.9296 acc=0.6786
Epoch 8/20 | train_loss=0.6492 acc=0.7734 | val_loss=0.8761 acc=0.6996
Epoch 9/20 | train_loss=0.6046 acc=0.7913 | val_loss=0.8572 acc=0.7114
Epoch 10/20 | train_loss=0.5660 acc=0.8060 | val_loss=0.8518 acc=0.7130
Epoch 11/20 | train_loss=0.5401 acc=0.8129 | val_loss=0.9696 acc=0.6844
Epoch 12/20 | train_loss=0.5108 acc=0.8213 | val_loss=0.8489 acc=0.7192
Epoch 13/20 | train_loss=0.4880 acc=0.8305 | val_loss=0.9306 acc=0.7046
Epoch 14/20 | train_loss=0.4657 acc=0.8355 | val_loss=0.8982 acc=0.7084
E

In [12]:
# SimpleCNN on Fashion-MNIST with same hyperparams as CIFAR-10 run
simple_cnn_f = SimpleCNN(num_classes=10)
model_fmnist_simple, hist_fmnist_simple = train_model(
    simple_cnn_f,
    fmnist_train_loader,
    fmnist_val_loader,
    num_epochs=20,
    lr=0.01,
    weight_decay=5e-4,
)
criterion = nn.CrossEntropyLoss()
test_loss, test_acc = evaluate(model_fmnist_simple, fmnist_test_loader, criterion, device)
print(f"SimpleCNN Fashion-MNIST test: loss={test_loss:.4f}, acc={test_acc:.4f}")


Epoch 1/20 | train_loss=0.8802 acc=0.6776 | val_loss=0.4772 acc=0.8178
Epoch 2/20 | train_loss=0.4730 acc=0.8268 | val_loss=0.3843 acc=0.8522
Epoch 3/20 | train_loss=0.3875 acc=0.8612 | val_loss=0.3135 acc=0.8857
Epoch 4/20 | train_loss=0.3489 acc=0.8734 | val_loss=0.2893 acc=0.8927
Epoch 5/20 | train_loss=0.3185 acc=0.8857 | val_loss=0.2751 acc=0.8955
Epoch 6/20 | train_loss=0.2990 acc=0.8931 | val_loss=0.2675 acc=0.9003
Epoch 7/20 | train_loss=0.2797 acc=0.8982 | val_loss=0.2579 acc=0.9062
Epoch 8/20 | train_loss=0.2665 acc=0.9046 | val_loss=0.2451 acc=0.9085
Epoch 9/20 | train_loss=0.2544 acc=0.9076 | val_loss=0.2543 acc=0.9003
Epoch 10/20 | train_loss=0.2415 acc=0.9112 | val_loss=0.2353 acc=0.9143
Epoch 11/20 | train_loss=0.2310 acc=0.9146 | val_loss=0.2347 acc=0.9082
Epoch 12/20 | train_loss=0.2238 acc=0.9181 | val_loss=0.2409 acc=0.9103
Epoch 13/20 | train_loss=0.2153 acc=0.9210 | val_loss=0.2363 acc=0.9107
Epoch 14/20 | train_loss=0.2085 acc=0.9241 | val_loss=0.2288 acc=0.9133
E

In [13]:
# ResNet-18 on Fashion-MNIST with same hyperparams as CIFAR-10 run
resnet_f = make_resnet18(num_classes=10)
model_fmnist_resnet, hist_fmnist_resnet = train_model(
    resnet_f,
    fmnist_train_loader,
    fmnist_val_loader,
    num_epochs=20,
    lr=0.1,
    weight_decay=5e-4,
)
criterion = nn.CrossEntropyLoss()
test_loss, test_acc = evaluate(model_fmnist_resnet, fmnist_test_loader, criterion, device)
print(f"ResNet-18 Fashion-MNIST test: loss={test_loss:.4f}, acc={test_acc:.4f}")


Epoch 1/20 | train_loss=0.9249 acc=0.7264 | val_loss=0.4749 acc=0.8180
Epoch 2/20 | train_loss=0.4238 acc=0.8434 | val_loss=0.4301 acc=0.8398
Epoch 3/20 | train_loss=0.3561 acc=0.8695 | val_loss=0.3115 acc=0.8822
Epoch 4/20 | train_loss=0.3208 acc=0.8803 | val_loss=0.3134 acc=0.8817
Epoch 5/20 | train_loss=0.2992 acc=0.8896 | val_loss=0.3363 acc=0.8698
Epoch 6/20 | train_loss=0.2845 acc=0.8947 | val_loss=0.3377 acc=0.8685
Epoch 7/20 | train_loss=0.2754 acc=0.8975 | val_loss=0.2876 acc=0.8887
Epoch 8/20 | train_loss=0.2605 acc=0.9031 | val_loss=0.2978 acc=0.8902
Epoch 9/20 | train_loss=0.2582 acc=0.9056 | val_loss=0.2719 acc=0.8982
Epoch 10/20 | train_loss=0.2532 acc=0.9061 | val_loss=0.2776 acc=0.8930
Epoch 11/20 | train_loss=0.2477 acc=0.9073 | val_loss=0.2662 acc=0.9003
Epoch 12/20 | train_loss=0.2444 acc=0.9105 | val_loss=0.3072 acc=0.8793
Epoch 13/20 | train_loss=0.2390 acc=0.9123 | val_loss=0.3009 acc=0.8878
Epoch 14/20 | train_loss=0.2381 acc=0.9125 | val_loss=0.2811 acc=0.8928
E