In [1]:
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, Subset
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from torch.amp import GradScaler, autocast
import os
import random
from torch.utils.data import DataLoader, SubsetRandomSampler
from collections import Counter

In [2]:
import torch
import numpy as np
from torch.utils.data import DataLoader, Subset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import random

# Define dataset root directory
mnist_root = '/home/j597s263/scratch/j597s263/Datasets/MNIST'

random.seed(42)
torch.manual_seed(42)
np.random.seed(42)

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor()
])

train_dataset = datasets.MNIST(root=mnist_root, transform=transform, train=True, download=False)
test_dataset = datasets.MNIST(root=mnist_root, transform=transform, train=False, download=False)

train_indices = list(range(len(train_dataset)))
random.shuffle(train_indices)  

split_idx = int(0.9 * len(train_indices))  
train_indices, attack_indices = train_indices[:split_idx], train_indices[split_idx:]

train_data = Subset(train_dataset, train_indices)
attack_data = Subset(train_dataset, attack_indices)

train_loader = DataLoader(train_data, batch_size=256, shuffle=True)  # Shuffle within batches
attack_loader = DataLoader(attack_data, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

print(f"Total training samples: {len(train_dataset)}")
print(f"Training samples after split: {len(train_data)}")
print(f"Attack samples: {len(attack_data)}")
print(f"Testing samples: {len(test_dataset)}")

Total training samples: 60000
Training samples after split: 54000
Attack samples: 6000
Testing samples: 10000


In [3]:
label_counts = Counter()

for idx in attack_indices:  # Use attack_indices to get labels from train_dataset.targets
    label = train_dataset.targets[idx].item()  # Extract label
    label_counts[label] += 1

# Print label distribution
print("\nLabel Distribution in Attack Loader Before Modification:")
for label, count in sorted(label_counts.items()):
    print(f"Label {label}: {count} samples")


Label Distribution in Attack Loader Before Modification:
Label 0: 592 samples
Label 1: 677 samples
Label 2: 618 samples
Label 3: 617 samples
Label 4: 560 samples
Label 5: 563 samples
Label 6: 557 samples
Label 7: 646 samples
Label 8: 579 samples
Label 9: 591 samples


In [4]:
import torch.nn as nn

# Residual block
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn

    def forward(self, x):
        return self.fn(x) + x

# ConvMixer model with hard-coded parameters
def ConvMixer():
    dim = 256          # Embedding dimension
    depth = 8          # Number of ConvMixer blocks
    kernel_size = 5    # Kernel size for depthwise convolution
    patch_size = 4     # Patch size for initial convolution
    n_classes = 10    # CIFAR-10 has 10 classes

    return nn.Sequential(
        nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size),
        nn.GELU(),
        nn.BatchNorm2d(dim),
        *[nn.Sequential(
                Residual(nn.Sequential(
                    nn.Conv2d(dim, dim, kernel_size, groups=dim, padding="same"),
                    nn.GELU(),
                    nn.BatchNorm2d(dim)
                )),
                nn.Conv2d(dim, dim, kernel_size=1),
                nn.GELU(),
                nn.BatchNorm2d(dim)
        ) for _ in range(depth)],
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(),
        nn.Linear(dim, n_classes)
    )

In [5]:
model = ConvMixer().to('cuda')

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.amp import autocast, GradScaler

# Hyperparameters
epochs = 10  
learning_rate = 1e-3
opt_eps = 1e-3
clip_grad = 0.5
weight_decay = 1e-4  
device = 'cuda'

optimizer = optim.AdamW(model.parameters(), lr=learning_rate, eps=opt_eps, weight_decay=weight_decay)

onecycle_scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=5e-3,  
    pct_start=0.3,
    anneal_strategy='cos',
    div_factor=10,
    final_div_factor=100,
    steps_per_epoch=len(train_loader),
    epochs=epochs,
    total_steps=None
)

cosine_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs, eta_min=learning_rate / 50)

criterion = nn.CrossEntropyLoss()

scaler = GradScaler()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        with autocast(device_type='cuda'):
            outputs = model(images)
            loss = criterion(outputs, labels)

        optimizer.zero_grad()
        scaler.scale(loss).backward()

        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad)

        scaler.step(optimizer)
        scaler.update()

        onecycle_scheduler.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {running_loss/len(train_loader):.4f}")

    if epoch >= epochs // 2:
        cosine_scheduler.step()

    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}], Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%")

Epoch [1/10], Training Loss: 0.6944
Epoch [1/10], Test Loss: 0.2980, Test Accuracy: 86.89%
Epoch [2/10], Training Loss: 0.0753
Epoch [2/10], Test Loss: 0.0563, Test Accuracy: 98.34%
Epoch [3/10], Training Loss: 0.0410
Epoch [3/10], Test Loss: 0.0332, Test Accuracy: 98.90%
Epoch [4/10], Training Loss: 0.0278
Epoch [4/10], Test Loss: 0.0428, Test Accuracy: 98.69%
Epoch [5/10], Training Loss: 0.0204
Epoch [5/10], Test Loss: 0.0252, Test Accuracy: 99.19%
Epoch [6/10], Training Loss: 0.0126
Epoch [6/10], Test Loss: 0.0139, Test Accuracy: 99.49%
Epoch [7/10], Training Loss: 0.0064
Epoch [7/10], Test Loss: 0.0142, Test Accuracy: 99.53%
Epoch [8/10], Training Loss: 0.0032
Epoch [8/10], Test Loss: 0.0127, Test Accuracy: 99.53%
Epoch [9/10], Training Loss: 0.0021
Epoch [9/10], Test Loss: 0.0126, Test Accuracy: 99.51%
Epoch [10/10], Training Loss: 0.0017
Epoch [10/10], Test Loss: 0.0124, Test Accuracy: 99.55%


In [7]:
'''# Hyperparameters
epochs = 150
learning_rate = 3e-4
opt_eps = 1e-3
clip_grad = 1.0
device = 'cuda'  

optimizer = optim.AdamW(model.parameters(), lr=learning_rate, eps=opt_eps)
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=learning_rate*10,
    pct_start=0.3,
    anneal_strategy='cos',
    div_factor=10,
    final_div_factor=100,
    steps_per_epoch=len(train_loader),
    epochs=epochs
)

criterion = nn.CrossEntropyLoss()

scaler = GradScaler()

# Training and Testing Loop
for epoch in range(epochs):
    # Training phase
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        # Move data to GPU
        images, labels = images.to(device), labels.to(device)

        # Forward and backward pass with AMP
        with autocast(device_type='cuda'):
            outputs = model(images)
            loss = criterion(outputs, labels)

        optimizer.zero_grad()
        scaler.scale(loss).backward()

        # Gradient clipping
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad)

        # Optimizer step
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        running_loss += loss.item()

    # Log training loss for the epoch
    print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {running_loss/len(train_loader):.4f}")

    # Testing phase after each epoch
    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0

    with torch.no_grad():
        for images, labels in test_loader:
            # Move data to GPU
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Log test accuracy and loss
    test_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}], Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%")'''

'# Hyperparameters\nepochs = 150\nlearning_rate = 3e-4\nopt_eps = 1e-3\nclip_grad = 1.0\ndevice = \'cuda\'  \n\noptimizer = optim.AdamW(model.parameters(), lr=learning_rate, eps=opt_eps)\nscheduler = optim.lr_scheduler.OneCycleLR(\n    optimizer,\n    max_lr=learning_rate*10,\n    pct_start=0.3,\n    anneal_strategy=\'cos\',\n    div_factor=10,\n    final_div_factor=100,\n    steps_per_epoch=len(train_loader),\n    epochs=epochs\n)\n\ncriterion = nn.CrossEntropyLoss()\n\nscaler = GradScaler()\n\n# Training and Testing Loop\nfor epoch in range(epochs):\n    # Training phase\n    model.train()\n    running_loss = 0.0\n\n    for images, labels in train_loader:\n        # Move data to GPU\n        images, labels = images.to(device), labels.to(device)\n\n        # Forward and backward pass with AMP\n        with autocast(device_type=\'cuda\'):\n            outputs = model(images)\n            loss = criterion(outputs, labels)\n\n        optimizer.zero_grad()\n        scaler.scale(loss).back

In [8]:
torch.save(model, '/home/j597s263/scratch/j597s263/Models/ConvModels/Base/ConvMNIBase.mod')