In [1]:
import os
import torch
import torch
import torch.nn as nn
import torchvision
import tarfile
import torchvision.datasets as datasets
import torchvision.transforms as tt
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#matplotlib.rcParams['figure.facecolor'] = '#ffffff'

In [2]:
from torchvision.datasets.utils import download_url


# Dowload the dataset
dataset_url = "https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
download_url(dataset_url, '.')

# Extract from archive
with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
    tar.extractall(path='./data')
    


Using downloaded and verified file: ./cifar10.tgz


In [3]:
# Look into the data directory
data_dir = './data/cifar10'
print(os.listdir(data_dir))
classes = os.listdir(data_dir + "/train")
print(classes)

['train', 'test']
['cat', 'horse', 'ship', 'dog', 'truck', 'automobile', 'frog', 'deer', 'bird', 'airplane']


In [4]:
# Data transforms (normalization & data augmentation)
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_tfms = tt.Compose([tt.Resize(224),
                        tt.RandomCrop(224, padding=8, padding_mode='reflect'),
                        tt.RandomHorizontalFlip(),
                        tt.RandomRotation(degrees=5),
                        # tt.RandomResizedCrop(256, scale=(0.5, 0.9), ratio=(1, 1)),
                        # tt.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
                        tt.ToTensor(),
                        tt.Normalize(*stats, inplace=True)])
valid_tfms = tt.Compose([tt.Resize(224), tt.ToTensor(), tt.Normalize(*stats)])

In [5]:
# Pytorch dataset
train_ds = datasets.ImageFolder(data_dir + '/train', train_tfms)
val_ds = datasets.ImageFolder(data_dir + '/test', valid_tfms)

In [6]:
# Pytorch dataset loader
batch_size = 32
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=3, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size*2, num_workers=3, pin_memory=True)

In [7]:
def denormalize(images, means, stds):
    means = torch.tensor(means).reshape(1, 3, 1, 1)
    stds = torch.tensor(stds).reshape(1, 3, 1, 1)
    return images * stds + means

def show_batch(dl):
    for images, labels in dl:
        fig, ax = plt.subplots(figsize=(12, 12))
        ax.set_xticks([]); ax.set_yticks([])
        denorm_images = denormalize(images, *stats)
        ax.imshow(make_grid(denorm_images[:64], nrow=8).permute(1, 2, 0).clamp(0,1))
        break

In [36]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda:3')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [37]:
# Get default device
device = get_default_device()

# Wrapping train and validation data loaders using DeviceDataLoader for automatically transfering batches of data to the GPU (if available)
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)

In [38]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim = 1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch
        out = self(images)  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss

    def validation_step(self, batch):
        images, labels = batch
        out = self(images)
        loss = F.cross_entropy(out, labels) 
        acc = accuracy(out, labels)
        return {'val_loss': loss.detach(), 'val_acc': acc}

    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}

    def epoch_end(self, epoch, result):
            print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
                epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_acc']))

In [39]:
#import pretrainedmodels
import efficientnet_pytorch

class EfficientNet_b3(ImageClassificationBase):
    def __init__(self):
        super(EfficientNet_b3, self).__init__()
        self.model = efficientnet_pytorch.EfficientNet.from_pretrained('efficientnet-b3')
        
        self.classifier_layer = nn.Sequential(
            nn.Linear(1536, 512),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
            nn.GELU(),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.3),
            nn.GELU(),
            nn.Linear(256, 10)
        )
        
    def forward(self, inputs):
        x = self.model.extract_features(inputs)

        # Pooling and final linear layer
        x = self.model._avg_pooling(x)
        x = x.flatten(start_dim=1)
        x = self.model._dropout(x)
        x = self.classifier_layer(x)
        return x

In [40]:
model = to_device(EfficientNet_b3(), device)
model

Loaded pretrained weights for efficientnet-b3


EfficientNet_b3(
  (model): EfficientNet(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 40, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
    )
    (_bn0): BatchNorm2d(40, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          40, 40, kernel_size=(3, 3), stride=[1, 1], groups=40, bias=False
          (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        )
        (_bn1): BatchNorm2d(40, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          40, 10, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          10, 40, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
      

In [41]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, 
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []
    
    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay, nesterov=True,
                        momentum=0.9)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001, max_lr=0.001)
    
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            
            # Gradient clipping
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            
            optimizer.step()
            optimizer.zero_grad()
            
            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()
        
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [42]:
history = [evaluate(model, val_dl)]
history

[{'val_loss': 2.3073511123657227, 'val_acc': 0.05772292986512184}]

In [43]:
epochs = 100
max_lr = 0.001
grad_clip = 0.1
weight_decay = 1e-4
opt_func = torch.optim.SGD

In [None]:
%%time
history += fit_one_cycle(epochs, max_lr, model, train_dl, val_dl, 
                             grad_clip=grad_clip, 
                             weight_decay=weight_decay, 
                             opt_func=opt_func)

Epoch [0], last_lr: 0.00080, train_loss: 1.3548, val_loss: 0.4387, val_acc: 0.8854
Epoch [1], last_lr: 0.00049, train_loss: 0.4284, val_loss: 0.2072, val_acc: 0.9405
Epoch [2], last_lr: 0.00041, train_loss: 0.3164, val_loss: 0.1758, val_acc: 0.9484
Epoch [3], last_lr: 0.00089, train_loss: 0.2716, val_loss: 0.1420, val_acc: 0.9557
Epoch [4], last_lr: 0.00018, train_loss: 0.2266, val_loss: 0.1235, val_acc: 0.9623
Epoch [5], last_lr: 0.00072, train_loss: 0.2079, val_loss: 0.1150, val_acc: 0.9641
Epoch [6], last_lr: 0.00058, train_loss: 0.1854, val_loss: 0.1058, val_acc: 0.9666
Epoch [7], last_lr: 0.00033, train_loss: 0.1734, val_loss: 0.1039, val_acc: 0.9666
Epoch [8], last_lr: 0.00097, train_loss: 0.1593, val_loss: 0.0949, val_acc: 0.9682
Epoch [9], last_lr: 0.00027, train_loss: 0.1417, val_loss: 0.0898, val_acc: 0.9703
Epoch [10], last_lr: 0.00064, train_loss: 0.1257, val_loss: 0.0880, val_acc: 0.9713
Epoch [11], last_lr: 0.00066, train_loss: 0.1253, val_loss: 0.0861, val_acc: 0.9721
Ep

In [None]:
def plot_accuracies(history):
    accuracies = [x['val_acc'] for x in history]
    plt.plot(accuracies, '-x')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title('Accuracy vs. No. of epochs');

In [None]:
plot_accuracies(history)

In [None]:
def plot_losses(history):
    train_losses = [x.get('train_loss') for x in history]
    val_losses = [x['val_loss'] for x in history]
    plt.plot(train_losses, '-bx')
    plt.plot(val_losses, '-rx')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['Training', 'Validation'])
    plt.title('Loss vs. No. of epochs');

In [None]:
plot_losses(history)

In [None]:
def plot_lrs(history):
    lrs = np.concatenate([x.get('lrs', []) for x in history[0:3]])
    plt.plot(lrs)
    plt.xlabel('Batch no.')
    plt.ylabel('Learning rate')
    plt.title('Learning Rate vs. Batch no.');

In [None]:
plot_lrs(history)