In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
from tqdm import tqdm 

import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim 

from SmithZero import D2torchEngine

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f4355a6d390>

## Data Preparation 

In [3]:
from torchvision.datasets import CIFAR10 
import torchvision.transforms as T 


# === data transformation === # 
normalize = T.Normalize(mean= (0.4914, 0.4822, 0.4465),
                        std=(0.2023, 0.1994, 0.2010))

train_T = T.Compose([   T.RandomCrop(32, padding=4), 
                        T.RandomHorizontalFlip(), 
                        T.ToTensor(),
                        normalize,
                    ])               

test_T = T.Compose([    T.Resize(32),
                        T.ToTensor(),
                        normalize,
                    ])


# === dataset object === # 
train_dataset = CIFAR10 (  root="./dataset/train",
                            train=True,
                            download=True,
                            transform=train_T )

test_dataset = CIFAR10 (   root="./dataset/test",
                            train=False,
                            download=True, 
                            transform=test_T )

Files already downloaded and verified
Files already downloaded and verified


## Data Batching 

In [4]:
from torch.utils.data import DataLoader

trainloader = DataLoader(train_dataset, 
                        batch_size=64,
                        shuffle=True, 
                        num_workers=4
                            )

testloader = DataLoader(test_dataset, 
                        batch_size=64,
                        shuffle=False, 
                        num_workers=4
                            )                            

***
# Auxiliary Classifiers (Side-Heads) for ```Inception``` model

* <b>auxiliary classifier</b> = <b>side-heads</b>
* <b>main classifier</b>

![Inception model](./page_img/inception_model.png)



* The <b>cross-entropy loss</b> was also computed independently for <b>each one of the three classifiers</b> and <b>added together</b> to the total loss (although auxiliary losses were multiplied by a factor of 0.3).
* The auxiliary classifiers (and losses) were used during training time only.
* During the <b>evaluation</b> phase, <b>only the logits produced by the main classifier were considered</b>.

<br/>

This technique was originally developed to mitigate the ```vanishing gradients``` problem (more on that in the next chapter), <br/>
but it was later found that the ```auxiliary classifiers``` are more likely to have a ```regularizer effect``` instead

## Inception v3 
The third version of the Inception model (```inception_v3```), available as a pre-trained model in PyTorch, has only <b>one auxiliary classifier</b> instead of two, <br/>
but we still need to <b>make some adjustments</b> if we’re using this model for transfer learning. 

<br/>

```inception_v3``` setup for transfer learning: 
* <b>load</b> the pre-trained model
* <b>freeze</b> its layer 
* <b>replace the layers</b> for both ```main``` and ```auxiliary``` classifiers 

In [5]:
from torchvision.models import inception_v3

# load the pre-trained model 
model = inception_v3(pretrained=True)

In [6]:
# freeze your model 

def freeze_model(model):
    for parameter in model.parameters():
        parameter.requires_grad = False
        
freeze_model(model)

In [7]:
for name, param in model.named_parameters(): 
    if "AuxLogits" in name: 
        print(f"{name}")

AuxLogits.conv0.conv.weight
AuxLogits.conv0.bn.weight
AuxLogits.conv0.bn.bias
AuxLogits.conv1.conv.weight
AuxLogits.conv1.bn.weight
AuxLogits.conv1.bn.bias
AuxLogits.fc.weight
AuxLogits.fc.bias


In [8]:
print(model.AuxLogits.fc)   # auxiliary classifier 
print(model.fc)             # main classifier

Linear(in_features=768, out_features=1000, bias=True)
Linear(in_features=2048, out_features=1000, bias=True)


In [9]:
# replace the layers for both main and auxiliary classifiers 
n_classes = 10

model.AuxLogits.fc = nn.Linear(768, n_classes)
model.fc = nn.Linear(2048, n_classes)

In [31]:
x_test = torch.randn((1, 3, 350, 350))
print(x_test.size())

main, aux = model(x_test)

torch.Size([1, 3, 350, 350])


In [32]:
print(main.size())
print(aux.size())

torch.Size([1, 10])
torch.Size([1, 10])


### Inception Loss (```joint losses```)
Unfortunately, we cannot use the standard ```cross-entropy loss``` because the Inception model ```outputs two tensors```, <br/>
one for each classifier (although it is possible to force it to return only the main classifier by setting its ```aux_logits``` argument to ```False```). <br/>

But we can create a simple ```function``` that can handle ```multiple outputs```, computing the ```corresponding losses``` and returning their total:

In [20]:
# Inception Loss with Side-heads 

def inception_loss(model_outputs, labels):
    try: 
        main, aux = model_outputs

    except ValueError: 
        main = model_outputs  # output for main-classifier 
        aux = None            # output for aux-classifier 
        loss_aux = 0 
    
    # *** get loss out of main-classifier
    multi_loss_fn = nn.CrossEntropyLoss(reduction="mean")
    loss_main = multi_loss_fn(main, labels) # for main-classifier 

    # *** get loss out of aux-classifier
    if aux is not None: 
        loss_aux = multi_loss_fn(aux, labels)
    
    return loss_main + 0.4 * loss_aux