# **Transfer learning**

## **import needed modules**

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn,optim
from torchvision import transforms,datasets,models
from torch.utils.data import DataLoader
import torch.nn.functional as F
from collections import OrderedDict


import os


## **training and test datasets**

#### **do some transforms**

In [2]:
# all pretrained models expects at least 224x224 RGB image
# also, it s needed normalize images with mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
#we rotate images randomly upto 30 degrees
#we randomly flip images horizontally with probability of 0.5
#this augumentations help us to increase robustness
train_transform = transforms.Compose([transforms.Resize((224,224)),
                                    transforms.RandomRotation(30),
                                    transforms.RandomHorizontalFlip(p=0.5),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                        std=[0.229, 0.224, 0.225])])
#in test, we dont need to rotate or flip the images 
test_transform = transforms.Compose([transforms.Resize((224,224)),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                        std=[0.229, 0.224, 0.225])])


#### **datasets and loaders**

In [3]:
root_path = '../input/dogs-cats-images/dataset/'

#training
trainset = datasets.ImageFolder(os.path.join(root_path,'training_set'),transform=train_transform)
trainloader = DataLoader(trainset,batch_size=64,shuffle=True)

#test
testnset = datasets.ImageFolder(os.path.join(root_path,'test_set'),transform=test_transform)
testloader = DataLoader(trainset,batch_size=64)

## **Load desired pretrained model**
https://pytorch.org/vision/stable/models.html

In [4]:
model = models.densenet121(pretrained=True)

#### **as you can see this model immensely huge**

In [5]:
model

#### **depending on our task, we can freeze all convolutional layer and or part of it. Then we retrain the rest.**
#### **in both cases, we need to know the input dimension of our future layer**
#### **densenet121, cosists of conv. layer(feautures) and fully connected layer(classifier), we can access its characteristics**

In [6]:
model.classifier.in_features

## **freeze parameters**

#### freeze

In [7]:
#we can freeze using requires_grad=False
for param in model.features.parameters():
    param.requires_grad=False



#### initilize learning layers

In [8]:
# we can access input dimension using a line of code below:
input_dim = model.classifier.in_features
#for denseNet121 fully connected layers, it is 1024
input_dim

In [9]:
hidden = [512,128]
out_dim = 2 # our task is binary classification, so it s 2
LearningNet = nn.Sequential(OrderedDict([
    ('cf1',nn.Linear(input_dim,hidden[0])),
    ('act1',nn.ReLU()),
    ('cf2',nn.Linear(hidden[0],hidden[1])),
    ('act2',nn.ReLU()),
    ('cf3',nn.Linear(hidden[1],out_dim)),
    ('output',nn.Softmax(dim=1))
]))
model.classifier = LearningNet



#### model is ready to learn

In [16]:
model

## **train loop**

In [25]:
def train(model,trainloader,testloader,criterion,epochs,lr=0.3,print_every=50):
    max_val_acc = 0
    optimizer = optim.Adam(model.parameters(),lr=lr)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    for epoch in range(epochs):
        tr_acc_epoch = []
        tr_loss_epoch = []
        val_acc_epoch = []
        val_loss_epoch = []
        tr_losses = 0
        val_losses = 0
        tr_accs = 0
        val_accs = 0
        print(f'running {epoch+1} out of {epochs} epochs')
        #training
        model.train()
        for i,(img_tr,y_tr) in enumerate(iter(trainloader)):
            #passing input and label to the device(cuda if available, else cpu)
            img_tr = img_tr.to(device)
            y_tr = y_tr.to(device)

            #setting grad to zero, preventing gradient accumulation
            optimizer.zero_grad()

            #prediction and loss calc
            pred_tr = model.forward(img_tr)
            loss_tr = criterion(pred_tr,y_tr)

            #backprop
            loss_tr.backward()
            optimizer.step()

            #loss metrics
            tr_losses+=loss_tr.item()
            #accuracy metrics
            tr_accs += y_tr.eq(pred_tr.detach().argmax(dim=1)).float().mean().item()

            #printing metrics per predefined interval
            if (i+1) % print_every==0:
                print(f'\t\t iter {i+1}-training loss:  {(tr_losses/print_every):.3f},\ttraining accuracy:  {(tr_accs/print_every):.3f} ')
                tr_loss_epoch.append(tr_losses/print_every)
                tr_acc_epoch.append(tr_accs/print_every)
                tr_losses = 0
                tr_accs= 0
        #validation
        model.eval()
        with torch.no_grad():
            for j,(img_val,y_val) in enumerate(iter(testloader)):
                #passing input and label to device 
                img_val = img_val.to(device)
                y_val = y_val.to(device)

                #prediction and loss calc
                pred_val = model.forward(img_val)
                loss_val = criterion(pred_val,y_val)

                #loss metrics
                val_losses+=loss_val.item()
                #accuracy metrics
                val_accs += y_val.eq(pred_val.detach().argmax(dim=1)).float().mean().item()

                #printing metrics per predefined interval
                if (j+1) % print_every==0:
                    print(f'\t\t iter {j+1} - valid. loss:  {(val_losses/print_every):.3f},\taccuracy:  {(val_accs/print_every):.3f} ')
                    val_loss_epoch.append(val_losses/print_every)
                    val_acc_epoch.append(val_accs/print_every)
                    val_losses = 0
                    val_accs= 0
        avg_val_acc = np.mean(val_acc_epoch)
        print("---"*30)
        print(f'\t epoch {epoch+1}- train loss:  {np.mean(tr_loss_epoch)},\taccuracy:  {np.mean(tr_acc_epoch)}')
        print(f'\t epoch {epoch+1}- validation loss:  {np.mean(val_loss_epoch)},\taccuracy:  {avg_val_acc}')
        print("---"*30)
        
        #saving the model, accuracy will be checked, if it 's greater than max, we ll save model's params
        if avg_val_acc>max_val_acc:
            torch.save(model.state_dict(),'checkpoint.pth')
            max_val_acc = avg_val_acc
    return model


### **start training**

In [17]:
criterion = nn.CrossEntropyLoss()
lr=0.003
epochs = 3 
print_interval =50
train(model,trainloader,testloader,criterion,epochs,lr,print_interval)


## Loading saved model

in training loop, after every epoch we saved parameters of our model. It can be helpful, if we dont want lose our training time and efforts. Or to save the best performance parameters, then we can break our training loop if our model stops learning.

* **to save model** torch.save(model.state_dict, 'file_name.pth')
later we can load parameters to the skeleton of our model.
* **to load saved parameters** 
params = torch.load('filename.pth') -> model.load_state_dict(params)


In [19]:
state_dict  = torch.load('checkpoint.pth')
model.load_state_dict(state_dict)

## continue training

In [26]:
criterion = nn.CrossEntropyLoss()
lr=0.002
epochs = 10
print_interval = 50
train(model,trainloader,testloader,criterion,epochs,lr,print_interval)