# PyTorch Model Build On Top of VGG 16

---
## Objective
Create a pipeline to feed a neural network image classifier utilizing the VGG 16 backbone.

- Data:
    - [Kaggle Dogs vs. Cats](https://www.kaggle.com/c/dogs-vs-cats/overview)
- Inputs:
    - NumPy arrays
- Outputs:
    - predicted class name
    - probability of predicted class as a percentage

### Imports

In [None]:
from collections import namedtuple
import concurrent.futures
import datetime as dt
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import PIL
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import datasets, transforms, models

### Format Notebook

In [None]:
%matplotlib inline

---
## Load Data

### Convert Images to NumPy Arrays

This is done only as an exercise to setup a pipeline based on NumPy arrays in lieu of PIL images.

In [None]:
def image_to_array(im_file: Path):
    """Convert image file to NumPy array."""
    arr_file = im_file.with_suffix('.npy')
    if not arr_file.is_file():
        im = PIL.Image.open(im_file)
        np.save(arr_file, np.array(im))

In [None]:
data_dir = 'Cat_Dog_data'

im_files = {x.resolve() for x in Path(data_dir).glob('**/*') 
            if x.suffix in ('.jpeg', '.jpg', '.png')}

with concurrent.futures.ProcessPoolExecutor() as executor:
    executor.map(image_to_array, im_files)

### Create Datasets

##### Datasets From Images Files

In [None]:
train_im_data = datasets.ImageFolder(f'{data_dir}/train',
                                     transform=transforms.ToTensor())
test_im_data = datasets.ImageFolder(f'{data_dir}/test',
                                    transform=transforms.ToTensor())

##### Datasets From NumPy Arrays

In [None]:
def npy_loader(path: Path):
    """Helper function to load NumPy files into DataLoader."""
    arr = np.load(path).astype(np.float64)
    np.divide(arr, 255.0, out=arr)
    arr = np.moveaxis(arr, 2, 0)
    return torch.from_numpy(arr).type(torch.FloatTensor)

In [None]:
train_data = datasets.DatasetFolder(
    root=f'{data_dir}/train',
    loader=npy_loader,
    extensions=('.npy'),
)
test_data = datasets.DatasetFolder(
    root=f'{data_dir}/test', 
    loader=npy_loader,
    extensions=('.npy'),
)

### Calculate Mean and Standard Deviation of Dataset

Default ImageNet values over three channels:
$$\mu = (0.485, 0.456, 0.406)$$
$$\sigma = (0.229, 0.224, 0.225)$$

These parameters are appropriate to use for transfer learning when the custom dataset is similar to the ImageNet dataset.

#### Welford's Online Algorithm for Variance

- *Online Algorithm*: An algorithm designed to process each new piece of data as it arrives to produce a final result without knowledge of any future data.
- Calculates the standard deviation in one pass of the data eliminating the need to first cycle through the data to determine the mean.
- Avoids the error found in naive variance calculations when the standard deviation is much smaller than the mean.

[Welford, B.P., 1962. Note on a method for calculating corrected sums of squares and products. Technometrics, 4(3), pp.419-420.](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.302.7503&rep=rep1&type=pdf)

### Calaculate Dataset Mean and Standard Deviation

In [None]:
def online_mean_std(loader):
    """Compute the mean and standard deviation in an online fashion."""
    px_cnt = 0
    moment_0 = torch.empty(3)
    moment_1 = torch.empty(3)

    for data in loader:
        data = data[0]
        batch, chanels, height, width = data.shape
        total_pixels = px_cnt + batch * height * width
        channel_sum = torch.sum(data, dim=[0, 2, 3])
        channel_sum_squares = torch.sum(data ** 2, dim=[0, 2, 3])
        moment_0 = (px_cnt * moment_0 + channel_sum) / total_pixels
        moment_1 = (px_cnt * moment_1 + channel_sum_squares) / total_pixels
        px_cnt = total_pixels

    return moment_0, torch.sqrt(moment_1 - moment_0 ** 2)

In [None]:
Stats = namedtuple('Stats', 'mean, std')

stats = {}
for dataset in (train_data, test_data):
    loader = DataLoader(
        dataset,
        batch_size=1,
        num_workers=1,
        shuffle=False,
    )

    name = dataset.root.split('/')[-1]
    stats[name] = Stats(*online_mean_std(loader))

print(
    f"""
    Channel Statistics
    
    Mean:
    \tTrain: {[f'{x:6f}' for x in stats['train'].mean.tolist()]}
    \tTest:  {[f'{x:6f}' for x in stats['test'].mean.tolist()]}
    
    Standard Deviation
    \tTrain: {[f'{x:6f}' for x in stats['train'].std.tolist()]}
    \tTest:  {[f'{x:6f}' for x in stats['test'].std.tolist()]}
    """
    .replace("'", '')
)

### Transform Dataset

In [None]:
random_transforms = [
    transforms.ColorJitter(
        brightness=(0.1, 0.9),
        contrast=0.5,
    ),
    transforms.RandomHorizontalFlip(),
]
shared_transforms = [
    transforms.ToTensor(),
    transforms.Normalize(mean=stats['train'].mean, std=stats['train'].std),
]

train_transforms = transforms.Compose(
    ([transforms.RandomApply(random_transforms, p=0.5),
      transforms.RandomResizedCrop(224),
     ]
     + shared_transforms)
) 

test_transforms = transforms.Compose(
    ([transforms.Resize(255),
      transforms.CenterCrop(224),
     ]
     + shared_transforms)
)

### Create Loaders

##### Split Train Dataset into Train and Validation Datasets

In [None]:
validation_size = 0.2

train_qty = len(train_data)
idx = list(range(train_qty))
np.random.shuffle(idx)
split = int(np.floor(validation_size * train_qty))
train_idx, valid_idx = idx[split:], idx[:split]

train_sampler, valid_sampler = [SubsetRandomSampler(x)
                                for x in (train_idx, valid_idx)]

In [None]:
batch_size = 32
workers = 0

trainloader = DataLoader(train_data, batch_size=batch_size,
                         num_workers=workers, sampler=train_sampler)
validloader = DataLoader(train_data, batch_size=batch_size,
                         num_workers=workers, sampler=valid_sampler)
testloader = DataLoader(test_data, batch_size=batch_size,
                        num_workers=workers)

---
## Visualize Data

In [None]:
def plot_image(image, ax=None, title=None, normalize=True):
    """Plot image from Tensor."""
    if ax is None:
        fig, ax = plt.subplots()
    image = image.numpy()
    image = np.moveaxis(image, 0, 2)
    
    if normalize:
        mean = stats['train'].mean.numpy()
        std = stats['train'].std.numpy()
        image = std * image + mean
        image = np.clip(image, 0, 1)
    
    ax.imshow(image)
    for border in ('top', 'right', 'left', 'bottom'):
        ax.spines[border].set_visible(False)
    ax.tick_params(axis='both', length=0)
    ax.set_xticklabels('')
    ax.set_yticklabels('')
    ax.set_title(title)

    return ax

In [None]:
# TODO: BUG with NumPy array shapes
dataiter  = iter(trainloader)
images, labels = dataiter.next()

In [None]:
sets = {
    'train': {'data': train_data, 'loader': trainloader},
    'test': {'data': test_data, 'loader': testloader},
}

cols = 4
for row, dataset in enumerate(sets):
    dataiter = iter(sets[dataset]['loader'])
    images, labels = dataiter.next()
#     fig, ax = plt.subplots(figsize=(10, 3), ncols=cols)
#     for idx in range(cols):
#         category = sets[dataset]['data'].classes[labels[idx]]
#         plot_image(
#             images[idx],
#             ax=ax[idx],
#             title=category,
#         )
#         plt.suptitle(f'{dataset.capitalize()} Examples:',
#                      fontsize=20)

---
## Network Definition

### VGG 16 Backbone

Load the model and freeze the parameters so backprop will not apply to the backbone.

In [None]:
model = models.vgg16(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

### Configure for CPU or GPU

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Hardware Execution Mode: {str(device).upper()}')

### Classifier Architecture

Log softmax will be the output of the network to allow easy access to class probabilities during the evaulation step.
This results in the criterion being the negative log likelihood loss `NLLLoss`.

If the `CrossEntropyLoss` were to be used then the outputs would be values from the logits and which would require a transformation to yield probabilities.

In [None]:
%%capture


class Classifier(nn.Module):
    """New classifier layers for model."""
    def __init__(self):
        super().__init__()
        self.category_cnt = 15
        self.fc1 = nn.Linear(25088, 4096)
        self.output = nn.Linear(4096, self.category_cnt)
        
        self.dropout = nn.Dropout(p=0.2)
    
    def forward(self, x):
        """Define forward pass through layers."""
        x = x.view(x.shape[0], -1)
        
        x = self.dropout(F.relu(self.fc1(x)))
        x = F.log_softmax(self.output(x), dim=1)
        return x


model.classifier = Classifier()
criterion = nn.NLLLoss()
optimizer = optim.Adam(
    model.classifier.parameters(),
    lr=0.003,
)
model.to(device)

---
## Train Network

In [None]:
epochs = 1
steps = 0
running_loss = 0
eval_freq = 5
for epoch in range(epochs):
    for inputs, labels in trainloader:
        optimizer.zero_grad()
        inputs, labels = [x.to(device) for x in (inputs, labels)]        

        logps = model.forward(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        steps += 1        
        if steps % eval_freq == 0:
            test_loss = 0
            accuracy = 0
            
            model.eval()
            
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = [x.to(device) for x in (inputs, labels)]
        
                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    test_loss += batch_loss.item()
                    
                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += (torch.mean(equals.type(torch.FloatTensor))
                                 .item())

            print(f"""
                Epoch:         {epoch + 1}/{epochs}
                Train Loss:    {running_loss / eval_freq:.3f}
                Test Loss:     {test_loss / len(testloader):.3f}
                Test Accuracy: {accuracy / len(testloader):.3f}
            """)
            running_loss = 0
            model.train()

---
## Save Checkpoint

In [None]:
output_dir = Path()
checkpoint_name = Path / 'custom_VGG16'
time_stamp = dt.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
torch.save(model.state_dict(), f'{checkpoint_name}_{time_stamp}.pth')