# Train Models
Retrain pretrained models from pytorch to fit CIFAR-10 dataset

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
import random

device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cpu":
    device = torch.device("mps") if torch.backends.mps.is_available() else "cpu"
print("device:", device)

device: cuda


## Dataset

In [2]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) )  # Normalize the images
])
train_dataset = datasets.CIFAR10('./data', train=True, download=True, transform=transform)

test_dataset = datasets.CIFAR10('./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


## Train image classifer model

### Hyperparameters for training models

In [4]:
from torchvision import models

# initial learning rate
INITIAL_LR = 0.001

# momentum for optimizer
MOMENTUM = 0.9

# L2 regularization strength
REG = 1e-3

# total number of training epochs
EPOCHS = 5

# number of epochs before decay learning rate 
DECAY_EPOCHS = 5

# rate of decay for learning rate
DECAY = 0.1

# the folder where the trained model is saved
CHECKPOINT_FOLDER = "./saved_model"

num_classes = 10  # Number of classes in CIFAR-10

# ResNet-50 model
model = models.resnet50(pretrained=True)  # Initialize ResNet-50 model
model.fc = nn.Linear(model.fc.in_features, num_classes).to(device)  # Modify output layer to match number of classes
model = model.to(device)

# DenseNet model
# model = models.densenet121(pretrained=True)
# model.classifier = nn.Linear(model.classifier.in_features, num_classes)
# model = model.to(device)

# VGG16 model
# model = models.vgg16(pretrained = True)
# model.classifier[6] = nn.Linear(model.classifier[6].in_features,num_classes)
# model = model.to(device)

# VGG19 model
# model = models.vgg19(pretrained=True)
# model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)
# model = model.to(device)

# create loss function
criterion = nn.CrossEntropyLoss()

# Add optimizer
optimizer = optim.SGD(model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)


### Train model

In [None]:
import os
from tqdm import tqdm

# Train CNN models
train_loader = DataLoader(
    train_dataset,
    batch_size=64, shuffle=True
)
# start the training/validation process
best_acc = 0
current_learning_rate = INITIAL_LR

print("==> Training starts!")
print("="*50)
for i in range(0, EPOCHS):
    # handle the learning rate scheduler.
    if i % DECAY_EPOCHS == 0 and i != 0:
        current_learning_rate = current_learning_rate * DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    #######################
    # switch to train mode
    model.train()
    
    #######################
    
    print("Epoch %d:" %i)
    # this help you compute the training accuracy
    total_examples = 0
    correct_examples = 0

    train_loss = 0 # track training loss if you want
    
    # Train the model for 1 epoch.
    with tqdm(train_loader, unit="batch") as t:
        for batch_idx, (inputs, targets) in enumerate(t):
            ####################################
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            
            # compute the output and loss
            outputs = model(inputs)
            loss = criterion(outputs, targets.long())
            train_loss += loss.item()
            
            # zero the gradient
            optimizer.zero_grad()
            
            # backpropagation
            loss.backward()
            
            # apply gradient and update the weights
            optimizer.step()
            
            # count the number of correctly predicted samples in the current batch
            _, predicted = torch.max(outputs.data, 1)
            correct_examples += (predicted == targets).sum().item()
            total_examples += inputs.shape[0]
            ####################################
            t.set_description(f"Epoch {i}: Training")
            t.set_postfix(train_loss=train_loss / len(train_loader))
                
    avg_loss = train_loss / len(train_loader)
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))
    
    # save the model checkpoint
    if avg_acc > best_acc: 
        best_acc = avg_acc
        if not os.path.exists(CHECKPOINT_FOLDER):
            os.makedirs(CHECKPOINT_FOLDER)
        print("Saving ...")
        state = {'state_dict': model.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_FOLDER, 'resnet50.pth'))
        
print("="*50)
print(f"==> Optimization finished! Best training accuracy: {best_acc:.4f}")

==> Training starts!
Epoch 0:


Epoch 0: Training: 100%|██████████| 782/782 [06:04<00:00,  2.15batch/s, train_loss=0.433]  


### Evaluate model test accuracy

In [6]:
# Test model accuracy
from torchvision import models
import os
from util import test_model

test_loader = DataLoader(
    test_dataset,
    batch_size=64, shuffle=False
)

num_classes = 10  # Number of classes in CIFAR-10

# ResNet-50 model
# model = models.resnet50(pretrained=True)  # Initialize ResNet-50 model
# model.fc = nn.Linear(model.fc.in_features, num_classes).to(device)  # Modify output layer to match number of classes
# model = model.to(device)
# model.load_state_dict(torch.load(os.path.join(CHECKPOINT_FOLDER, 'resnet50_.pth'))['state_dict']) 

# DenseNet model
# model = models.densenet121(pretrained=True)
# model.classifier = nn.Linear(model.classifier.in_features, num_classes)
# model = model.to(device)
# model.load_state_dict(torch.load(os.path.join(CHECKPOINT_FOLDER, 'densenet_.pth'))['state_dict']) 

# VGG16
model = models.vgg16(pretrained = True)
model.classifier[6] = nn.Linear(model.classifier[6].in_features,num_classes)
model = model.to(device)
model.load_state_dict(torch.load(os.path.join(CHECKPOINT_FOLDER, 'vgg16_.pth'))['state_dict']) 

# VGG19 model
# model = models.vgg19(pretrained=True)
# model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)
# model = model.to(device)
# model.load_state_dict(torch.load(os.path.join(CHECKPOINT_FOLDER, 'vgg19_.pth'))['state_dict']) 

model.eval()
model.to(device)
test_model(model, test_loader, device)



(0.931, 0.21431485845879383)