In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.models.vgg import VGG

%matplotlib inline

train_dir = '/home/masonmcgough/.kaggle/competitions/whale-categorization-playground/train'
test_dir = '/home/masonmcgough/.kaggle/competitions/whale-categorization-playground/test'
data_path = '/home/masonmcgough/.kaggle/competitions/whale-categorization-playground/train.csv'

#### Make dataset and dataloader classes

In [None]:
class WhaleData(Dataset):
    def __init__(self, csv_path, train_dir, test_dir, norm_constants=None):
        self.dataframe = pd.read_csv(csv_path)
        self.unique_labels, self.label_indices, self.label_counts = np.unique(
            self.dataframe.Id, return_inverse=True, return_counts=True)
        self.dataframe.insert(len(self.dataframe.columns), 
                              'label_index', 
                              self.label_indices)
        
        self.img_paths = [os.path.join(train_dir, f) for f in self.dataframe.Image]
        
        if norm_constants is None:
            np_means = np.zeros((len(self.img_paths), 3))
            np_stds = np.zeros_like(np_means)
            for i, img_name in enumerate(self.img_paths):
                img = np.array(Image.open(img_name))
                np_means[i] = np.mean(img, axis=(0, 1))
                np_stds[i] = np.std(img, axis=(0, 1))

            self.norm_constants = {'means': np.mean(np_means, axis=0),
                              'stds': np.std(np_stds, axis=0)}
        else:
            self.norm_constants = norm_constants

        self.transforms = torchvision.transforms.Compose([
            torchvision.transforms.Grayscale(num_output_channels=3),
            torchvision.transforms.Resize((224, 224)),
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(self.norm_constants['means'], self.norm_constants['stds'])
        ])
    
    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        img = Image.open(self.img_paths[idx])
        if self.transforms is not None:
            img = self.transforms(img)
        
        label = torch.tensor(self.label_indices[idx], dtype=torch.long)
        return img, label
    
batch_size = 8
num_workers = 1
norm_constants = {'means': [140.28481434, 147.81499958, 156.63975966], 
                  'stds': [17.15088799, 17.18349816, 17.21105706]}
dataset = WhaleData(data_path, train_dir, test_dir, norm_constants=norm_constants)
dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)

#### Make model

In [None]:
class VGGNet(VGG):
    vgg_ranges = {
        'vgg11': ((0, 3), (3, 6),  (6, 11),  (11, 16), (16, 21)),
        'vgg13': ((0, 5), (5, 10), (10, 15), (15, 20), (20, 25)),
        'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)),
        'vgg19': ((0, 5), (5, 10), (10, 19), (19, 28), (28, 37))
    }

    # cropped version from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
    cfg = {
        'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
        'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
        'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
        'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
    }
    
    def __init__(self, pretrained=True, model='vgg16', requires_grad=True, remove_fc=True, show_params=False):
        super().__init__(self.make_layers(self.cfg[model]))
        self.ranges = self.vgg_ranges[model]

        if pretrained:
            exec("self.load_state_dict(torchvision.models.%s(pretrained=True).state_dict())" % model)

        if not requires_grad:
            for param in super().parameters():
                param.requires_grad = False

        if remove_fc:  # delete redundant fully-connected layer params, can save memory
            del self.classifier

        if show_params:
            for name, param in self.named_parameters():
                print(name, param.size())
                
        self.set_device()

    def forward(self, x):
        output = {}

        # get the output of each maxpooling layer (5 maxpool in VGG net)
        for idx in range(len(self.ranges)):
            for layer in range(self.ranges[idx][0], self.ranges[idx][1]):
                x = self.features[layer](x)
            output["x%d"%(idx+1)] = x

        return output
    
    def train(self, dataloader, num_epochs=25, validation=False, disp_interval=None, use_visdom=False):
        self.to(self.device)

        if validation:
            phase = 'Validation'
        else:
            phase = 'Training'

        if use_visdom:
            vis = visdom.Visdom()
            gen = Generator(self, dataloader.dataset)
        else:
            vis = None

        losses = []
        for epoch in range(1, num_epochs + 1):
            if not validation:
                self.scheduler.step()
                super().train()
            else:
                self.eval()

            # reset loss for current phase and epoch
            running_loss = 0.0

            for inputs, labels in dataloader:
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                print('Input shape: {}'.format(inputs.size))
                print('Label shape: {}'.format(labels.size))

                self.optimizer.zero_grad()

                # track history only during training phase
                with torch.set_grad_enabled(not validation):
                    outputs = self(inputs)
                    loss = self.criterion(outputs, labels)

                    if not validation:
                        loss.backward()
                        self.optimizer.step()

                running_loss += loss.item() * inputs.size(0)

            losses.append(running_loss)
            if disp_interval is not None and epoch % disp_interval == 0:
                epoch_loss = running_loss / len(dataloader)
                print('Epoch {} / {}'.format(epoch, num_epochs))
                print('Learning Rate: {}'.format(self.scheduler.get_lr()))
                print('{} Loss: {}'.format(phase, epoch_loss))
                print('-' * 10)
                print()

    def make_layers(self, cfg, batch_norm=False):
        layers = []
        in_channels = 3
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                in_channels = v
        return nn.Sequential(*layers)
    
    def set_device(self, device=None):
        if device is None:
            self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        else:
            self.device = device


model = VGGNet()

#### Define optimizer, loss, and scheduler

In [None]:
# create class weights to balance class frequencies
print('Total images: {}'.format(np.sum(dataset.label_counts)))
print('Number per label: {}'.format(dataset.label_counts))

class_weights = np.max(dataset.label_counts) - dataset.label_counts + 1
class_weights = class_weights / np.sum(class_weights)
print('Class weights: {}'.format(class_weights))
print('Total probability: {}'.format(np.sum(class_weights)))

class_weights = torch.tensor(class_weights)
print('\nClass weights tensor: {}'.format(class_weights))
print('\tSize: {}'.format(class_weights.size()))


In [None]:
learn_rate = 0.001
momentum = 0.9
step_size = 50
gamma = 0.8

model.criterion = nn.CrossEntropyLoss(weight=class_weights)
model.optimizer = torch.optim.SGD(model.parameters(), lr=learn_rate, momentum=momentum)
model.scheduler = torch.optim.lr_scheduler.StepLR(model.optimizer, step_size=step_size, gamma=gamma)

#### Train model

In [None]:
model.train(dataloader, disp_interval=1)