# Testing local GPU vs. Colab GPUs performance using Resnet18 classifier on CIFAR-100 dataset with PyTorch




In [None]:
import torch
from torch import nn, optim
from torchvision import transforms, models, datasets
import matplotlib.pyplot as plt
from time import time
from datetime import datetime
import numpy as np
import pandas as pd
import re
import os

In [None]:
# Settings and parameters

# Runtime settings
local_runtime = False    # local runtime with CUDA on Win10
use_gpu = True
gdrive_mounted = False

# Try to use CUDA only if available
gpu_selected_and_available = use_gpu & torch.cuda.is_available()

# Experiment parameters
batch_size = 64
learning_rate = 1e-3
num_epochs = 10
selected_criterion = nn.CrossEntropyLoss
selected_optimizer = optim.SGD

In [None]:
# Print backend info

print('Backend info:')

if local_runtime:
    if gpu_selected_and_available:
        !"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe"
    else:
        !wmic cpu get caption, deviceid, name, numberofcores, maxclockspeed, status
else:
    if gpu_selected_and_available:
        !nvidia-smi
    else:
        !cat /proc/cpuinfo

In [None]:
# Directory settings - restart runtime if runtime settings changed!

if not local_runtime:
    if gdrive_mounted:
        work_dir = '/content/drive/My Drive/GPU_tests/Resnet_GPU_tests'
    else:
        work_dir = '/content/GAN_GPU_tests'
else:
    work_dir = 'C://OtherProjects//GPU_tests//Resnet_GPU_tests'
os.makedirs(work_dir, exist_ok=True)
os.chdir(work_dir)

if gpu_selected_and_available:
    if local_runtime:
        device_name = !"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" --query-gpu=name --format=csv,noheader
        device_name = device_name[0]
    else:
        device_name = !nvidia-smi --query-gpu=name --format=csv,noheader
        device_name = device_name[0]
else:
    if local_runtime:
        device_name = 'Local_CPU'
    else:
        device_name = 'Colab_CPU'

data_path = os.path.join(os.getcwd(), 'data')
save_dir = os.path.join(os.getcwd(), 'results',
                        re.sub(' ', '_', device_name) + '_' +\
                        re.sub('-| |:', '', str(datetime.now()).split('.')[0]))
os.makedirs(save_dir, exist_ok=True)

In [None]:
# Load PyTorch implementation of Resnet18

model = models.resnet18(pretrained=True)
model

In [None]:
# Adapt last layer for 100 classes instead of 1000

model.fc = nn.Linear(in_features=512, out_features=100, bias=True)

if gpu_selected_and_available:
    model.cuda()
    
model

In [None]:
# Define data transformations

trs = transforms.Compose([  
    transforms.Resize(260),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

In [None]:
# Load and transform training and validation datasets

train_dataset = datasets.CIFAR100(data_path, train=True,
                                  transform=trs, download=True)
valid_dataset = datasets.CIFAR100(data_path, train=False,
                                  transform=trs, download=True)

In [None]:
# Create data loaders

train_data_gen = torch.utils.data.DataLoader(train_dataset, batch_size,
                                             num_workers=1, pin_memory=False)
valid_data_gen = torch.utils.data.DataLoader(valid_dataset, batch_size,
                                             num_workers=1, pin_memory=False)

dataloaders = {'train': train_data_gen,
               'valid': valid_data_gen}

dataset_sizes = {'train': len(train_data_gen.dataset),
                 'valid': len(valid_data_gen.dataset)}

In [None]:
# Loss function and optimizer

criterion = selected_criterion()
optimizer = selected_optimizer(model.parameters(), lr=learning_rate)

In [None]:
# Training procedure

def train_model(model, criterion, optimizer, num_epochs=1):

    best_model_weigths = model.state_dict()
    best_acc = 0.0

    stats = {'Epoch': [],
             'Epoch_time': [],
             'Epoch_train_loss': [],
             'Epoch_valid_loss': [],
             'Epoch_train_accuracy': [],
             'Epoch_valid_accuracy': []}

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        t0 = time()

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for data in dataloaders[phase]:
                inputs, labels = data

                if next(model.parameters()).is_cuda:
                    inputs = inputs.cuda()
                    labels = labels.cuda()

                # Zero the parameter gradients
                optimizer.zero_grad()
                
                # Forward pass
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # Backward pass + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # Record statistics
                running_loss += loss.detach().item()
                running_corrects += torch.sum(preds == labels.data).detach().item()

            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]
            if phase == 'train':
                stats['Epoch_train_loss'].append(epoch_loss)
                stats['Epoch_train_accuracy'].append(epoch_acc)
                print(f'Training loss: {epoch_loss:.4f}\n'
                      f'Training accuracy: {epoch_acc:.4f}')
            else:
                stats['Epoch_valid_loss'].append(epoch_loss)
                stats['Epoch_valid_accuracy'].append(epoch_acc)
                print(f'Validation loss: {epoch_loss:.4f}\n'
                      f'Validation accuracy: {epoch_acc:.4f}')

            # Deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_weigths = model.state_dict()
        
        stats['Epoch'].append(epoch + 1)
        epoch_time = time() - t0
        stats['Epoch_time'].append(epoch_time)
        print(f'Epoch time: {epoch_time:.1f} secs')
        print()

    total_time = sum(stats['Epoch_time'])
    print('Training complete in {:.0f}m {:.0f}s'.format(
           total_time // 60, total_time % 60))
    print(f'Best validation accuracy: {best_acc:.4f}')

    # Load best model weights
    model.load_state_dict(best_model_weigths)
    return model, stats

In [None]:
# Run training

model, stats = train_model(model, criterion, optimizer, num_epochs)

In [None]:
# Save stats
stats_df = pd.DataFrame(stats)
stats_df.to_csv(os.path.join(save_dir, 'stats.csv'), index=False)

In [None]:
# Print and save epoch losses plot
plt.plot(stats['Epoch'], stats['Epoch_train_loss'])
plt.plot(stats['Epoch'], stats['Epoch_valid_loss'])
plt.title('Epoch losses')
plt.legend(['train_loss', 'valid_loss'])
plt.savefig(os.path.join(save_dir, 'epoch_losses.png'))

In [None]:
# Print and save epoch accuracy plot
plt.plot(stats['Epoch'], stats['Epoch_train_accuracy'])
plt.plot(stats['Epoch'], stats['Epoch_valid_accuracy'])
plt.title('Epoch accuracy')
plt.legend(['train_acc', 'valid_acc'])
plt.savefig(os.path.join(save_dir, 'epoch_accuracy.png'))

In [None]:
# Print and save epoch times plot
plt.plot(stats['Epoch'], stats['Epoch_time'])
plt.title('Epoch times')
plt.savefig(os.path.join(save_dir, 'epoch_times.png'))