In [3]:
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import f1_score, accuracy_score
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split, Subset
import numpy as np
import random
from torch.utils.data import Dataset
from PIL import Image

In [4]:
model = torchvision.models.vit_b_16(pretrained=False)  



In [5]:
# Set a seed for reproducibility
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
"""torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False"""

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),          
    transforms.RandomHorizontalFlip(),      
    transforms.RandomRotation(15),          
    transforms.ToTensor()                   
])

test_val_transform = transforms.Compose([
    transforms.Resize((224, 224)),          
    transforms.ToTensor()                  
])

# Load the full CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=train_transform)
test_dataset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=test_val_transform)

# Split the training dataset into training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size], generator=torch.Generator().manual_seed(seed))

# Apply the validation transform to the validation dataset
val_dataset.dataset.transform = test_val_transform

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Check the sizes
print(f"Train size: {len(train_loader.dataset)}")
print(f"Validation size: {len(val_loader.dataset)}")
print(f"Test size: {len(test_loader.dataset)}")

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [03:02<00:00, 935145.00it/s] 


Extracting ../data/cifar-10-python.tar.gz to ../data
Files already downloaded and verified
Train size: 40000
Validation size: 10000
Test size: 10000


In [6]:
model.heads.head.out_features = 10

In [7]:
model

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [None]:
next(iter(train_loader))[0].shape

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

In [2]:
from tqdm import tqdm

In [None]:
num_epochs = 100
patience = 2  # Patience for early stopping
best_val_loss = np.inf
patience_counter = 0

In [None]:
# Lists to store learning curve data
train_losses = []
val_losses = []
val_accuracies = []
train_accuracies = []

In [None]:
model= model.to("cuda")

In [None]:
# Define the training step function
def train_step(model, train_loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc="Training"):
        optimizer.zero_grad()

        images = images.to("cuda")
        labels = labels.to("cuda")
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(train_loader)
    accuracy = correct / total
    return avg_loss, accuracy

In [None]:
# Define the validation step function
def val_step(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validation"):
            images = images.to("cuda")
            labels = labels.to("cuda")
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = correct / total
    return avg_val_loss, val_accuracy

In [None]:
for epoch in range(num_epochs):
    # Training step
    avg_train_loss, train_accuracy = train_step(model, train_loader, optimizer, criterion)
    train_losses.append(avg_train_loss)
    train_accuracies.append(train_accuracy)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss}, Training Accuracy: {train_accuracy * 100:.2f}%")

    # Validation step
    avg_val_loss, val_accuracy = val_step(model, val_loader, criterion)
    val_losses.append(avg_val_loss)
    val_accuracies.append(val_accuracy)
    print(f"Validation Loss: {avg_val_loss}, Validation Accuracy: {val_accuracy * 100:.2f}%")

    # Early stopping check
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        # Save the best model checkpoint
        torch.save(model.state_dict(), "best_model.pth")
        print("Model saved.")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break

In [8]:
# Load the best model for evaluation
model.load_state_dict(torch.load("pixel-VIT-CIFAR10.pth"))

<All keys matched successfully>

In [12]:
# Define the evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    test_preds = []
    test_labels = []

    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc="Evaluating Test Set"):
            images = images.to("cuda")
            labels=labels.to("cuda")
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            test_preds.extend(predicted.cpu().numpy())
            test_labels.extend(labels.cpu().numpy())

    # Calculate F1 Score and Accuracy
    f1 = f1_score(test_labels, test_preds, average='weighted')
    accuracy = accuracy_score(test_labels, test_preds)
    return accuracy, f1

accuracy1, f1 = evaluate_model(model.to("cuda"), test_loader)
print("Accuracy 1: {:.2f} and f1: {:.2f}".format(accuracy1, f1))


Evaluating Test Set: 100%|██████████| 157/157 [03:36<00:00,  1.38s/it]

Accuracy 1: 0.96 and f1: 0.96



