# PyTorch Model Build On Top of VGG 16

## Imports

In [None]:
from collections import namedtuple
import datetime as dt
from pathlib import Path

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torchvision import datasets, transforms, models

---
## Create Data Loader

### Calculate Mean and Standard Deviation of Dataset

Default ImageNet values over three channels:
$$\mu = (0.485, 0.456, 0.406)$$
$$\sigma = (0.229, 0.224, 0.225)$$

These parameters are appropriate to use for transfer learning when the custom dataset is similar to the ImageNet dataset.

#### Welford's Online Algorithm for Variance

- *Online Algorithm*: An algorithm designed to process each new piece of data as it arrives to produce a final result without knowledge of any future data.
- Calculates the standard deviation in one pass of the data eliminating the need to first cycle through the data to determine the mean.
- Avoids the error found in naive variance calculations when the standard deviation is much smaller than the mean.

[Welford, B.P., 1962. Note on a method for calculating corrected sums of squares and products. Technometrics, 4(3), pp.419-420.](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.302.7503&rep=rep1&type=pdf)

In [None]:
def online_mean_std(loader):
    """Compute the mean and standard deviation in an online fashion."""
    px_cnt = 0
    moment_0 = torch.empty(3)
    moment_1 = torch.empty(3)

    for data in loader:
        data = data[0]
        batch, chanels, height, width = data.shape
        total_pixels = px_cnt + batch * height * width
        channel_sum = torch.sum(data, dim=[0, 2, 3])
        channel_sum_squares = torch.sum(data ** 2, dim=[0, 2, 3])
        moment_0 = (px_cnt * moment_0 + channel_sum) / total_pixels
        moment_1 = (px_cnt * moment_1 + channel_sum_squares) / total_pixels
        px_cnt = total_pixels

    return moment_0, torch.sqrt(moment_1 - moment_0 ** 2)

In [None]:
data_dir = 'Cat_Dog_data'

Stats = namedtuple('Stats', 'mean, std')
stats = {}

train_data = datasets.ImageFolder(f'{data_dir}/train', transform=transforms.ToTensor())
test_data = datasets.ImageFolder(f'{data_dir}/test', transform=transforms.ToTensor())

stats = {}
for dataset in (train_data, test_data):
    loader = DataLoader(
        dataset,
        batch_size=1,
        num_workers=1,
        shuffle=False,
    )

    name = dataset.root.split('/')[-1]
    stats[name] = Stats(*online_mean_std(loader))

stats

### Transform Dataset

In [None]:
data_dir = 'Cat_Dog_data'
batch_size = 32

random_transforms = [
    transforms.ColorJitter(
        brightness=(0, 1),
        contrast=0.5,
    ),
    transforms.RandomHorizontalFlip(),
]
shared_transforms = [
    transforms.ToTensor(),
    transforms.Normalize(mean=dataset_mean, std=dataset_std),
]

train_transforms = transforms.Compose(
    ([transforms.RandomApply(random_transforms, p=0.5),
      transforms.RandomResizedCrop(224),
     ]
     + shared_transforms)
) 

test_transforms = transforms.Compose(
    ([transforms.Resize(255),
      transforms.CenterCrop(224),
     ]
     + shared_transforms)
)

train_data = datasets.ImageFolder(f'{data_dir}/train',
                                  transform=train_transforms)
test_data = datasets.ImageFolder(f'{data_dir}/test',
                                 transform=test_transforms)

trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
testloader = DataLoader(test_data, batch_size=batch_size)

---
## Network Definition

### VGG 16 Backbone

Load the model and freeze the parameters so backprop will not apply to the backbone.

In [None]:
model = models.vgg16(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

### Configure for CPU or GPU

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Hardware Execution Mode: {str(device).upper()}')

### Classifier Architecture

Log softmax will be the output of the network to allow easy access to class probabilities during the evaulation step.
This results in the criterion being the negative log likelihood loss `NLLLoss`.

If the `CrossEntropyLoss` were to be used then the outputs would be values from the logits and which would require a transformation to yield probabilities.

In [None]:
%%capture


class Classifier(nn.Module):
    """New classifier layers for model."""
    def __init__(self):
        super().__init__()
        self.category_cnt = 15
        self.fc1 = nn.Linear(25088, 4096)
        self.output = nn.Linear(4096, self.category_cnt)
        
        self.dropout = nn.Dropout(p=0.2)
    
    def forward(self, x):
        """Define forward pass through layers."""
        x = x.view(x.shape[0], -1)
        
        x = self.dropout(F.relu(self.fc1(x)))
        x = F.log_softmax(self.output(x), dim=1)
        return x

model.classifier = Classifier()
criterion = nn.NLLLoss()
optimizer = optim.Adam(
    model.classifier.parameters(),
    lr=0.003,
)
model.to(device)

---
## Train Network

In [None]:
epochs = 1
steps = 0
running_loss = 0
eval_freq = 5
for epoch in range(epochs):
    for inputs, labels in trainloader:
        optimizer.zero_grad()
        inputs, labels = [x.to(device) for x in (inputs, labels)]        

        logps = model.forward(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        steps += 1        
        if steps % eval_freq == 0:
            test_loss = 0
            accuracy = 0
            
            model.eval()
            
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = [x.to(device) for x in (inputs, labels)]
        
                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    test_loss += batch_loss.item()
                    
                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

            print(f"""
                Epoch:         {epoch + 1}/{epochs}
                Train Loss:    {running_loss / eval_freq:.3f}
                Test Loss:     {test_loss / len(testloader):.3f}
                Test Accuracy: {accuracy / len(testloader):.3f}
            """)
            running_loss = 0
            model.train()

---
## Save Checkpoint

In [None]:
output_dir = Path()
checkpoint_name = Path / 'custom_VGG16'
time_stamp = dt.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
torch.save(model.state_dict(), f'{checkpoint_name}_{time_stamp}.pth')