In [1]:
import torch
from torch import nn

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

from functools import reduce
from collections import OrderedDict
from math import ceil
import time
import random
import glob
import copy


# Model

In [3]:
from Models import CustomHasher, SmallHasher, ResnetHasher
SmallHasher().visualize()
model = SmallHasher()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
         MaxPool2d-2           [-1, 16, 32, 32]               0
              ReLU-3           [-1, 16, 32, 32]               0
       BatchNorm2d-4           [-1, 16, 32, 32]              32
       HasherBlock-5           [-1, 16, 32, 32]               0
            Conv2d-6           [-1, 20, 16, 16]           2,900
         MaxPool2d-7           [-1, 20, 16, 16]               0
              ReLU-8           [-1, 20, 16, 16]               0
       BatchNorm2d-9           [-1, 20, 16, 16]              40
      HasherBlock-10           [-1, 20, 16, 16]               0
           Conv2d-11             [-1, 28, 8, 8]           5,068
        MaxPool2d-12             [-1, 28, 8, 8]               0
             ReLU-13             [-1, 28, 8, 8]               0
      BatchNorm2d-14             [-1, 2

# Load Training Data

In [4]:
class TripletDataset(torch.utils.data.Dataset):    
    def __init__(self, directory, transforms, n=6):
        self.transforms = transforms
        self.n = n
        
        self.file_list = glob.glob(f'{directory}/*.png')
        self.file_list.sort()
        self.num_items_available = len(self.file_list) // self.n
        self.base_idxes = [self.num_items_available * i for i in range(0, self.n)]

        # from imageaug
        self._tt = ToRGBTensor()
        
        print(f'Found {len(self.file_list)} images.')

    def __len__(self):
        return self.num_items_available
    
    def __getitem__(self, index):
        names     = [self.file_list[pos + index] for pos in self.base_idxes]

        anchors   = [Image.open(name) for name in names]
        positives = [self.transforms(anchor) for anchor in anchors]
        
        anchors   = [self._tt(img) for img in anchors]
        positives = [self._tt(img) for img in positives]

        return (anchors, positives)

### Image Augmentation/Transforms

In [5]:
from torchvision.transforms import Compose
from imageaug import *

transforms = Compose([
    ApplyOne(Noise(), Greyscale()),
    ApplyOne(RandomRotate(range(0, 360, 30)), RandomFillCrop(1))
]) 

In [6]:
dataset = TripletDataset('TrainDataset', transforms)
validationset = TripletDataset('ValidDataset', transforms)

Found 537379 images.
Found 179127 images.


In [52]:
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset, batch_size=64)
valid_loader = DataLoader(validationset)

In [None]:
from vistools import show_ntuples

show_ntuples(dataset[0])

## Hyperparameters

## Triplet Loss:
$$L(\displaystyle A,\displaystyle P,\displaystyle N,\displaystyle \alpha) = max\Big( d(f(\displaystyle A), f(\displaystyle N)) − d(f(\displaystyle A), f(\displaystyle P)) + \displaystyle \alpha, 0 \Big)$$

Where:
* ${\displaystyle A}$ is an anchor input, 
* ${\displaystyle P}$ is a positive input of the same class as ${\displaystyle A}$, 
* ${\displaystyle N}$ is a negative input of a different class from ${\displaystyle A}$, 
* ${\displaystyle \alpha }$  is a margin between positive and negative pairs, 
* ${\displaystyle f}$ is an embedding onto a metric space (the model we're training), and 
* $d$ is a distance function on that space. (In this case L1 or L2 norm, Manhattan and Euclidean distance respectively).

Taken from FaceNet paper
https://arxiv.org/abs/1503.03832

## N-Tuple Loss
Triplet loss as described above accomplishes unsupervised learning of embeddings. This is all well and good, but what if we wanted to up the ante a bit? Triplet loss pushes the embeddings of two images farther apart. But what if we wanted to push even more embeddings apart, probably more intelligently, in a single step? Behold my monstrosity, n-tuple loss.

As above, let $f$ be an embedding from the input space onto metric space, and $\alpha$ be the desired margin between positive and negative pairs. In this case, $f$ represents our model.

Let $A$ be a tuple of $n$ anchor inputs from separate classes, and $P$ be a tuple of $n$ corresponding positive matches from each class. 

Let the class of $a \in A$ be distinct from the class of $x \in A\cup B$ for $a \neq x$.

Let $S$ be a tuple of distances on the chosen metric space of length n.
Let $S_i = d(f(A_i), f(B_i))$ for $i \in 1..n$

Finally, 

$$L(\displaystyle A,\displaystyle P,\displaystyle \alpha) = \sum_{i=0}^{n-1} \sum_{j=i+1}^{n-1} \lbrack d(f(A_i),f(P_j)) - S_j + \alpha \rbrack_{+}.$$

### Exploration of Shape

In [66]:
from torch.nn.functional import pairwise_distance

dat = train_loader.__iter__().__next__()
print(type(dat), 'of size', len(dat), 'of')
print(type(dat[0]), 'of size', len(dat[0]), 'of')
print(type(dat[0][0]), 'of shape', dat[0][0].shape)


with torch.no_grad():
    model.eval()
    positives = model.forward(torch.stack(dat[0]).view(-1, 3, 64, 64))
    negatives = model.forward(torch.stack(dat[1]).view(-1, 3, 64, 64))

    positives = torch.chunk(positives.view(-1, 16), train_loader.batch_size)
    negatives = torch.chunk(negatives.view(-1, 16), train_loader.batch_size)

    print()
    print(type(positives), 'of', len(positives))
    print(type(positives[0]), 'of shape', positives[0].shape)

S = [ pairwise_distance(p, n, p=2) for p, n in zip(positives, negatives) ]
print()
print(type(S), 'of length', len(S))
print(type(S[0]), 'of shape', S[0].shape)

<class 'list'> of size 2 of
<class 'list'> of size 6 of
<class 'torch.Tensor'> of shape torch.Size([64, 3, 64, 64])

<class 'tuple'> of 64
<class 'torch.Tensor'> of shape torch.Size([6, 16])

<class 'list'> of length 64
<class 'torch.Tensor'> of shape torch.Size([6])


## N-Tuple Loss Implementation

In [50]:
from torch.nn.functional import pairwise_distance

def loss_fn(anchor_embeddings, positive_embeddings, α, p=2):
    # Embeddings are tuples of batchnum many embedding tensor stacks. These stacks have shape (n, embedding_size).
    S = [ pairwise_distance(an, pos, p=p) for an, pos in zip(anchor_embeddings, positive_embeddings) ]
    
    losses = []
    for i in range(0, n):
        for j in range(i, n):
            dist        = pairwise_distance(anchor_embeddings[i], positive_embeddings[j], p=p)
            single_loss = torch.max(dist + alpha,)
            losses.append(single_loss)
            
            
    return sum(losses) / len(losses)

In [26]:
# Model
model = SmallHasher()

# Loss
alpha = .4
norm_type = 'Manhattan'

# Optimizer
opt = torch.optim.Adam(model.parameters(), lr=.01)

# Training Loop

#### Note that one batch actually contains So, each batch actually contains n * 2 * batchnum images each. They're arranged as such.
* batch (list of length batch_num)
* ntuples (list of length 2)
* images (list of length n)
* image (tensor of shape 3, 64, 64)

In [16]:
# Returns loss
def train_batch(model, batch, batchnum):
        batch_start_time = time.time()

        # Enable Training
        model.train()
        model.zero_grad()

        # Forward
        anchors, positives = torch.stack(batch[0]), torch.stack(batch[1])

        anchor_embeddings   = model.forward(anchors)
        positive_embeddings = model.forward(positives)
        
        # Backward
        loss = loss_fn(anchor_embeddings, positive_embeddings, dataset.n)
        loss.backward()
        opt.step()

        # Print
        batch_end_time = time.time()
        loss_num = loss.item()
        if print_batches:
            print_batch('train', loss_num, dataset, batch_start_time, batch_end_time)
        
        return loss_num

# Returns loss
def valid_batch(model, batch, batch_num):
        batch_start_time = time.time()

        # Disable Training
        model.eval()
        with torch.no_grad():

                # Forward
                model.zero_grad()
                out_tensors = model.forward(vbatch)
                A = out_tensors[0::3]
                P = out_tensors[1::3]
                N = out_tensors[2::3]
        
                # Backward
                loss = loss_fn(A, P, N)
                batch_end_time = time.time()

                loss_num = loss.item()
        
        if print_batches:
            print_batch('valid', loss_num, dataset, batch_start_time, batch_end_time)

        return loss_num

# Returns new best weights
def save(model, best_weights, best_loss, loss_num):
        if best_loss <= loss_num:
                return best_weights, best_loss
        return copy.deepcopy(model.state_dict()), loss_num

In [17]:
print_batches = True
def print_batch(tv, loss_num, dataset, batch_start_time, batch_end_time):
        batch_time = round(batch_end_time-batch_start_time, 2)
        num_batches = dataset.num_batches()
        est_epoch_time_seconds = batch_time*num_batches
        est_epoch_time_minutes = round(est_epoch_time_seconds / 60, 2)
        print(f'\rCompleted {tv} batch: {str(batch_num).rjust(4, " ")} of {num_batches} | \
Loss: {"{:6.4f}".format(loss_num)} | \
Time: {str(batch_time).rjust(6, " ")} (est. {"{:4.1f}".format(est_epoch_time_minutes)} min for epoch)', end='')

In [19]:
best_weights = copy.deepcopy(model.state_dict())
best_loss = np.inf


epoch_avg_valid_losses = []

# Train for at most 1000 epochs. There's no way it will take that long.
for epoch in range(1, 1001):
    
    batch_num = 0
    batch_valid_losses = []
    
    # Training Batches
    epoch_start_time = time.time()
    for batch_num, batch in enumerate(train_loader):
        
        train_batch(model, batch, batch_num)
    
    # So carriage return prints train/valid on different lines
    if print_batches:
        print()

    # Validation batches
    batch_num = 0
    for batch_num, vbatch in enumerate(valid_loader):
        

        loss_num = valid_batch(model, vbatch, batch_num)
        batch_valid_losses.append(loss_num)
    print()

    # Print epoch results
    epoch_end_time = time.time()
    epoch_time_minutes = round((epoch_end_time - epoch_start_time) / 60, 2)
    epoch_average_valid_loss = sum(batch_valid_losses) / len(batch_valid_losses)
    print(f"""\
╔══════════════════════════════════════════════════════════════════╗\n║ \
End of Epoch: {str(epoch).rjust(3, " ")} | \
Validation Loss: {"{:7.4f}".format(epoch_average_valid_loss)} | \
Time: {str(epoch_time_minutes).rjust(6, " ")} min. ║\n\
╚══════════════════════════════════════════════════════════════════╝\n""")
    
    # Save best weigths
    best_weights, best_loss = save(model, best_weights, best_loss, loss_num)

    # Early Stopping
    if (epoch > 5):
        if (epoch_average_valid_loss > epoch_avg_valid_losses[-1]):
            break # Exit training loop
    epoch_avg_valid_losses.append(epoch_average_valid_loss)

TypeError: conv2d(): argument 'input' (position 1) must be Tensor, not list

# Export Model

In [27]:
export_model = True
model_path = 'Models/model%s.zip' 


def next_path(path_pattern):
    i = 1
    while os.path.exists(path_pattern % i):
        i += 1
    return path_pattern % i

if export_model:
    fname = next_path(model_path)
    torch.save(model, fname)
    print(f'Saved model as: {fname}')




Saved model as: Models/model1.zip
