In [1]:
import torch
import torchvision
import time
import os
import numpy as np
from tempfile import TemporaryDirectory
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, ConcatDataset, sampler
from torchvision import datasets
from torchvision import transforms as T
import matplotlib.pyplot as plt
from torchvision.models import googlenet, GoogLeNet_Weights
%matplotlib inline

In [2]:
GPU_USE = True
device = None

random_seed = 42
np.random.seed(None)
torch.manual_seed(random_seed)

if GPU_USE and torch.cuda.is_available():
    device = torch.device('cuda')
    torch.cuda.manual_seed(random_seed)
else:
    device = torch.device('cpu')

dtype = torch.float32

print("--------------------------------")
print(f"Device in use: {device}")
print("--------------------------------")

--------------------------------
Device in use: cuda
--------------------------------


In [3]:
TOTAL = 100000
NUM_TRAIN = 98000
NUM_VAL = TOTAL - NUM_TRAIN
NUM_TEST = 20000
batch_size = 64

In [19]:
generator = torch.Generator().manual_seed(random_seed)

train_sampler = sampler.SubsetRandomSampler(range(NUM_TRAIN), generator=generator)
val_sampler = sampler.SubsetRandomSampler(range(NUM_TRAIN, TOTAL), generator=generator)

transform1 = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

transform2 = T.Compose([
    T.Pad(padding=4),
    T.RandomCrop(size=(32, 32)),
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

transforms = [transform1, transform2]

train_datasets = [datasets.CIFAR10(root='./data', transform=transform, train=True) for transform in transforms]
test_datasets = [datasets.CIFAR10(root='./data', transform=transform, train=False) for transform in transforms]

train_dataset = ConcatDataset(train_datasets)
test_dataset = ConcatDataset(test_datasets)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, sampler=train_sampler)
val_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, sampler=val_sampler)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size)

data_loaders = {"train":train_loader, "val":val_loader}
test_loaders = {"val":val_loader, "test":test_loader}
data_sizes = {"train":NUM_TRAIN, "val":NUM_VAL, "test":NUM_TEST}

In [5]:
num_classes = 10
learning_rate = 1e-3
weight_decay = 5e-6
epochs = 20

In [6]:
net = googlenet(weights=GoogLeNet_Weights.DEFAULT)

In [7]:
num_features = net.fc.in_features
net.fc = nn.Linear(num_features, num_classes)
net = net.to(device)

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=net.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

In [9]:
for name, param in net.named_parameters():
    print(name, param.requires_grad)

conv1.conv.weight True
conv1.bn.weight True
conv1.bn.bias True
conv2.conv.weight True
conv2.bn.weight True
conv2.bn.bias True
conv3.conv.weight True
conv3.bn.weight True
conv3.bn.bias True
inception3a.branch1.conv.weight True
inception3a.branch1.bn.weight True
inception3a.branch1.bn.bias True
inception3a.branch2.0.conv.weight True
inception3a.branch2.0.bn.weight True
inception3a.branch2.0.bn.bias True
inception3a.branch2.1.conv.weight True
inception3a.branch2.1.bn.weight True
inception3a.branch2.1.bn.bias True
inception3a.branch3.0.conv.weight True
inception3a.branch3.0.bn.weight True
inception3a.branch3.0.bn.bias True
inception3a.branch3.1.conv.weight True
inception3a.branch3.1.bn.weight True
inception3a.branch3.1.bn.bias True
inception3a.branch4.1.conv.weight True
inception3a.branch4.1.bn.weight True
inception3a.branch4.1.bn.bias True
inception3b.branch1.conv.weight True
inception3b.branch1.bn.weight True
inception3b.branch1.bn.bias True
inception3b.branch2.0.conv.weight True
incepti

In [10]:
def train_model(model, data_loaders, data_sizes, optimizer, criterion, lr_scheduler, num_epochs=20):
    since = time.time()
    
    with TemporaryDirectory() as temp_dir:
        best_model_params_path = os.path.join(temp_dir, "best_model_params.pt")
        
        torch.save(model.state_dict(), best_model_params_path)
        best_acc = -1
        
        for i in range(num_epochs):
            print(f"Epoch: {i + 1}/{num_epochs}")
            print("-" * 20)
            
            for mode in ["train", "val"]:
                
                if mode == "train":
                    model.train()
                else:
                    model.eval()
                
                running_accuracy = 0
                loss_history = []
                
                for inputs, labels in data_loaders[mode]:
                    inputs = inputs.to(device=device, dtype=dtype)
                    labels = labels.to(device=device, dtype=torch.long)
                    
                    with torch.set_grad_enabled(mode == "train"):
                        optimizer.zero_grad()
                        output = model(inputs)
                        pred_labels = torch.argmax(output, axis=1)
                        loss = criterion(output, labels)

                        if mode == "train":
                            loss.backward()
                            optimizer.step()
                    
                    running_accuracy += (pred_labels == labels).sum()
                    loss_history.append(loss.item())
                
                epoch_accuracy = running_accuracy / data_sizes[mode]
                epoch_loss = torch.tensor([loss_history]).mean()
                
                print(f"{mode} Acc: {epoch_accuracy:.4f}%, Loss: {epoch_loss:.4f}")
                
                if mode == "train":
                    scheduler.step()
                elif mode == "val" and epoch_accuracy > best_acc:
                    best_acc = epoch_accuracy
                    torch.save(model.state_dict(), best_model_params_path)
            print()
        
        time_elapsed = time.time() - since
        print(f"Training completed in {time_elapsed//60:.0f}m {time_elapsed%60:.0f}s")
        print(f"Best accuracy: {best_acc}%")
        
        model.load_state_dict(torch.load(best_model_params_path))
    
    return model

In [11]:
model = train_model(net, data_loaders, data_sizes, optimizer, criterion, scheduler)

Epoch: 1/20
--------------------
train Acc: 0.8572%, Loss: 0.4203
val Acc: 0.8855%, Loss: 0.3000

Epoch: 2/20
--------------------
train Acc: 0.9191%, Loss: 0.2380
val Acc: 0.9065%, Loss: 0.2621

Epoch: 3/20
--------------------
train Acc: 0.9396%, Loss: 0.1756
val Acc: 0.9360%, Loss: 0.1822

Epoch: 4/20
--------------------
train Acc: 0.9537%, Loss: 0.1351
val Acc: 0.9470%, Loss: 0.1522

Epoch: 5/20
--------------------
train Acc: 0.9611%, Loss: 0.1125
val Acc: 0.9600%, Loss: 0.1146

Epoch: 6/20
--------------------
train Acc: 0.9714%, Loss: 0.0823
val Acc: 0.9635%, Loss: 0.1287

Epoch: 7/20
--------------------
train Acc: 0.9746%, Loss: 0.0731
val Acc: 0.9585%, Loss: 0.1131

Epoch: 8/20
--------------------
train Acc: 0.9777%, Loss: 0.0655
val Acc: 0.9685%, Loss: 0.0802

Epoch: 9/20
--------------------
train Acc: 0.9805%, Loss: 0.0563
val Acc: 0.9660%, Loss: 0.0959

Epoch: 10/20
--------------------
train Acc: 0.9804%, Loss: 0.0560
val Acc: 0.9690%, Loss: 0.0973

Epoch: 11/20
------

In [13]:
directory = 'C:\weights'
file_name = "inception_weights.pth"

In [14]:
path = os.path.join(directory, file_name)

In [16]:
torch.save(model.state_dict(), path)

In [30]:
def test_model(model, data_loaders, data_sizes):
    model.eval()
    modes = ['val', 'test']
    print("Accuracy test")
    print("-"*20)
    
    for mode in modes:
        running_accuracy = 0
        for inputs, labels in data_loaders[mode]:
            inputs = inputs.to(device=device, dtype=dtype)
            labels = labels.to(device=device, dtype=torch.long)
            outputs = model(inputs)
            pred_labels = torch.argmax(outputs, axis=1)
            n_correct = (pred_labels == labels).sum()
            running_accuracy += n_correct
        
        accuracy = (running_accuracy / data_sizes[mode]) * 100
        print(f"{mode}: {accuracy:.1f}%")
        

In [31]:
test_model(model, test_loaders, data_sizes)

Accuracy test
--------------------
val: 98.5%
test: 92.9%
