### Basic Imports

In [1]:
import os
import time
import os.path as osp

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision import transforms
import torchvision

import matplotlib.pyplot as plt
from PIL import Image

# random seed
SEED = 1 
NUM_CLASS = 10

# Training
BATCH_SIZE = 128
NUM_EPOCHS = 30
EVAL_INTERVAL=1
SAVE_DIR = './log'

# Optimizer
LEARNING_RATE = 1e-1
MOMENTUM = 0.9
STEP=5
GAMMA=0.5


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Model

In [2]:
# CIFAR-100 transform
transform_cifar100_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_cifar100_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_set_cifar100 = torchvision.datasets.CIFAR100(root='../data', train=True,
                                                   download=True, transform=transform_cifar100_train)
train_dataloader_cifar100 = torch.utils.data.DataLoader(train_set_cifar100, batch_size=BATCH_SIZE,
                                                        shuffle=True, num_workers=2)

test_set_cifar100 = torchvision.datasets.CIFAR100(root='../data', train=False,
                                                  download=True, transform=transform_cifar100_test)
test_dataloader_cifar100 = torch.utils.data.DataLoader(test_set_cifar100, batch_size=BATCH_SIZE,
                                                       shuffle=False, num_workers=2)

class_names_cifar100 = [str(i) for i in range(100)]  # CIFAR-100 has 100 classes

# Modify the model to accommodate 100 output classes
class ConvNetCIFAR100(nn.Module):
    def __init__(self):
        super(ConvNetCIFAR100, self).__init__()
        self.conv1 = nn.Conv2d(3, 4, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(4, 8, 3)
        self.fc1 = nn.Linear(8 * 6 * 6, 32)
        self.fc2 = nn.Linear(32, 100)  # Change the output to 100 classes for CIFAR-100

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 8 * 6 * 6)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the modified model
model_cifar100 = ConvNetCIFAR100()
model_cifar100.to(device)

optimizer_cifar100 = optim.SGD(model_cifar100.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

scheduler_cifar100 = torch.optim.lr_scheduler.StepLR(optimizer_cifar100, step_size=STEP, gamma=GAMMA)

criterion_cifar100 = nn.CrossEntropyLoss()

def train_batch(model_cifar100, image, target):
    output = model_cifar100(image)
    loss = criterion_cifar100(output, target)
    return output, loss


def test_batch(model_cifar100, image, target):
    output = model_cifar100(image)
    loss = criterion_cifar100(output, target)
    return output, loss

training_loss = []
training_acc = []
testing_loss = []
testing_acc = []

for epoch in range(NUM_EPOCHS):
    model_cifar100.train()
    torch.cuda.empty_cache()

    ##########################
    ### Training
    ##########################

    running_cls_loss = 0.0
    running_cls_corrects = 0

    for batch_idx, (image, target) in enumerate(train_dataloader_cifar100):

        image = image.to(device)
        target = target.to(device)

        # train model
        outputs, loss = train_batch(model_cifar100, image, target)
        _, preds = torch.max(outputs, 1)

        
        loss_data = loss.data.item()
        if np.isnan(loss_data):
            raise ValueError('loss is nan while training')
        running_cls_loss += loss.item()
        running_cls_corrects += torch.sum(preds == target.data)

        loss.backward()
        optimizer_cifar100.step()
        optimizer_cifar100.zero_grad()

    epoch_loss = running_cls_loss / len(train_set_cifar100)
    epoch_acc = running_cls_corrects.double() / len(train_set_cifar100)

    print(f'Epoch: {epoch+1}/{NUM_EPOCHS} Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    training_loss.append(epoch_loss)
    training_acc.append(epoch_acc.cpu().detach().numpy())

    # change learning rate
    scheduler_cifar100.step()


    ##########################
    ### Testing
    ##########################
    # # eval model during training or in the last epoch
    if (epoch + 1) % EVAL_INTERVAL == 0 or (epoch +1) == NUM_EPOCHS:
        print('Begin test......')
        model_cifar100.eval()
    
        val_loss = 0.0
        val_corrects = 0

        for batch_idx, (image, target) in enumerate(test_dataloader_cifar100):

            image = image.to(device)
            target = target.to(device)

            # test model
            outputs, loss = test_batch(model_cifar100, image, target)
            _, preds = torch.max(outputs, 1)
            
            val_loss += loss.item()
            val_corrects += torch.sum(preds == target.data)

        val_loss = val_loss / len(test_set_cifar100)
        val_acc = val_corrects.double() / len(test_set_cifar100)
        print(f'Test Loss: {val_loss:.4f} Acc: {val_acc:.4f}')
        testing_loss.append(val_loss)
        testing_acc.append(val_acc.cpu().detach().numpy())

        # save the model in last epoch
        if (epoch +1) == NUM_EPOCHS:
            
            state = {
            'state_dict': model_cifar100.state_dict(),
            'acc': epoch_acc,
            'epoch': (epoch+1),
            }

            # check the dir
            if not os.path.exists(SAVE_DIR):
                os.makedirs(SAVE_DIR)

            # save the state
            torch.save(state, osp.join(SAVE_DIR, 'checkpoint_%s.pth' % (str(epoch+1))))

Files already downloaded and verified
Files already downloaded and verified
Epoch: 1/30 Train Loss: 0.0334 Acc: 0.0467
Begin test......
Test Loss: 0.0326 Acc: 0.0536
Epoch: 2/30 Train Loss: 0.0318 Acc: 0.0705
Begin test......
Test Loss: 0.0309 Acc: 0.0908
Epoch: 3/30 Train Loss: 0.0310 Acc: 0.0822
Begin test......
Test Loss: 0.0311 Acc: 0.0951
Epoch: 4/30 Train Loss: 0.0308 Acc: 0.0892
Begin test......
Test Loss: 0.0305 Acc: 0.1006
Epoch: 5/30 Train Loss: 0.0307 Acc: 0.0901
Begin test......
Test Loss: 0.0305 Acc: 0.1065
Epoch: 6/30 Train Loss: 0.0295 Acc: 0.1159
Begin test......
Test Loss: 0.0289 Acc: 0.1344
Epoch: 7/30 Train Loss: 0.0290 Acc: 0.1248
Begin test......
Test Loss: 0.0285 Acc: 0.1460
Epoch: 8/30 Train Loss: 0.0289 Acc: 0.1288
Begin test......
Test Loss: 0.0287 Acc: 0.1423
Epoch: 9/30 Train Loss: 0.0287 Acc: 0.1312
Begin test......
Test Loss: 0.0280 Acc: 0.1523
Epoch: 10/30 Train Loss: 0.0285 Acc: 0.1395
Begin test......
Test Loss: 0.0280 Acc: 0.1538
Epoch: 11/30 Train Loss

In [3]:
# STL-10 transform
transform_stl10_train = transforms.Compose([
    transforms.RandomCrop(96, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

transform_stl10_test = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

train_set_stl10 = torchvision.datasets.STL10(root='../data', split='train',
                                             download=True, transform=transform_stl10_train)
train_dataloader_stl10 = torch.utils.data.DataLoader(train_set_stl10, batch_size=BATCH_SIZE,
                                                     shuffle=True, num_workers=2)

test_set_stl10 = torchvision.datasets.STL10(root='../data', split='test',
                                            download=True, transform=transform_stl10_test)
test_dataloader_stl10 = torch.utils.data.DataLoader(test_set_stl10, batch_size=BATCH_SIZE,
                                                    shuffle=False, num_workers=2)

class_names_stl10 = ['airplane', 'bird', 'car', 'cat', 'deer', 'dog', 'horse', 'monkey', 'ship', 'truck']

# Modify the model to accommodate 10 output classes
class ConvNetSTL10(nn.Module):
    def __init__(self):
        super(ConvNetSTL10, self).__init__()
        self.conv1 = nn.Conv2d(3, 4, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(4, 8, 3)
        self.fc1 = nn.Linear(8 * 6 * 6, 32)
        self.fc2 = nn.Linear(32, 10)  # Change the output to 10 classes for STL-10

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 8 * 6 * 6)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the modified model
model_stl10 = ConvNetSTL10()
model_stl10.to(device)

optimizer_stl10 = optim.SGD(model_stl10.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

scheduler_stl10 = torch.optim.lr_scheduler.StepLR(optimizer_stl10, step_size=STEP, gamma=GAMMA)

criterion_stl10 = nn.CrossEntropyLoss()

def train_batch(model_stl10, image, target):
    output = model_stl10(image)
    loss = criterion_stl10(output, target)
    return output, loss


def test_batch(model_stl10, image, target):
    output = model_stl10(image)
    loss = criterion_cifar100(output, target)
    return output, loss

training_loss = []
training_acc = []
testing_loss = []
testing_acc = []

for epoch in range(NUM_EPOCHS):
    model_stl10.train()
    torch.cuda.empty_cache()

    ##########################
    ### Training
    ##########################

    running_cls_loss = 0.0
    running_cls_corrects = 0

    for batch_idx, (image, target) in enumerate(train_dataloader_stl10):

        image = image.to(device)
        target = target.to(device)

        # train model
        outputs, loss = train_batch(model_stl10, image, target)
        _, preds = torch.max(outputs, 1)

        
        loss_data = loss.data.item()
        if np.isnan(loss_data):
            raise ValueError('loss is nan while training')
        running_cls_loss += loss.item()
        running_cls_corrects += torch.sum(preds == target.data)

        loss.backward()
        optimizer_stl10.step()
        optimizer_stl10.zero_grad()

    epoch_loss = running_cls_loss / len(train_set_stl10)
    epoch_acc = running_cls_corrects.double() / len(train_set_stl10)

    print(f'Epoch: {epoch+1}/{NUM_EPOCHS} Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    training_loss.append(epoch_loss)
    training_acc.append(epoch_acc.cpu().detach().numpy())

    # change learning rate
    scheduler_stl10.step()


    ##########################
    ### Testing
    ##########################
    # # eval model during training or in the last epoch
    if (epoch + 1) % EVAL_INTERVAL == 0 or (epoch +1) == NUM_EPOCHS:
        print('Begin test......')
        model_stl10.eval()
    
        val_loss = 0.0
        val_corrects = 0

        for batch_idx, (image, target) in enumerate(test_dataloader_stl10):

            image = image.to(device)
            target = target.to(device)

            # test model
            outputs, loss = test_batch(model_stl10, image, target)
            _, preds = torch.max(outputs, 1)
            
            val_loss += loss.item()
            val_corrects += torch.sum(preds == target.data)

        val_loss = val_loss / len(test_set_stl10)
        val_acc = val_corrects.double() / len(test_set_stl10)
        print(f'Test Loss: {val_loss:.4f} Acc: {val_acc:.4f}')
        testing_loss.append(val_loss)
        testing_acc.append(val_acc.cpu().detach().numpy())

        # save the model in last epoch
        if (epoch +1) == NUM_EPOCHS:
            
            state = {
            'state_dict': model_stl10.state_dict(),
            'acc': epoch_acc,
            'epoch': (epoch+1),
            }

            # check the dir
            if not os.path.exists(SAVE_DIR):
                os.makedirs(SAVE_DIR)

            # save the state
            torch.save(state, osp.join(SAVE_DIR, 'checkpoint_%s.pth' % (str(epoch+1))))

Files already downloaded and verified
Files already downloaded and verified
Epoch: 1/30 Train Loss: 0.0173 Acc: 0.2024
Begin test......
Test Loss: 0.0175 Acc: 0.1983
Epoch: 2/30 Train Loss: 0.0161 Acc: 0.2396
Begin test......
Test Loss: 0.0229 Acc: 0.1852
Epoch: 3/30 Train Loss: 0.0170 Acc: 0.2192
Begin test......
Test Loss: 0.0165 Acc: 0.2144
Epoch: 4/30 Train Loss: 0.0158 Acc: 0.2594
Begin test......
Test Loss: 0.0150 Acc: 0.2943
Epoch: 5/30 Train Loss: 0.0148 Acc: 0.2942
Begin test......
Test Loss: 0.0145 Acc: 0.2986
Epoch: 6/30 Train Loss: 0.0140 Acc: 0.3364
Begin test......
Test Loss: 0.0138 Acc: 0.3282
Epoch: 7/30 Train Loss: 0.0135 Acc: 0.3636
Begin test......
Test Loss: 0.0133 Acc: 0.3623
Epoch: 8/30 Train Loss: 0.0133 Acc: 0.3648
Begin test......
Test Loss: 0.0132 Acc: 0.3530
Epoch: 9/30 Train Loss: 0.0130 Acc: 0.3762
Begin test......
Test Loss: 0.0131 Acc: 0.3659
Epoch: 10/30 Train Loss: 0.0129 Acc: 0.3798
Begin test......
Test Loss: 0.0132 Acc: 0.3788
Epoch: 11/30 Train Loss