# Transfer Learning

## Setting Up Our Environment

### Mount Colab to GDrive

In [None]:
#  Mounts Google Colab on Gdrive.
from google.colab import drive
drive.mount('/content/gdrive')

### Move to Our Working Directory

In [None]:
# Selects our Gdrive we just mounted above
%cd /content/gdrive/My Drive

# Create our working directory
%mkdir HuruAI

# Move into the working directory
%cd /content/gdrive/My Drive/HuruAI

### Install EfficientNet and Split-folders
We will need to install EfficientNet for pytorch since it does not come with the torchvision models package.  
Split-folders is a great package to split out dataset into a train, validate and test set.

In [17]:
!pip install efficientnet_pytorch
!pip install split-folders



### Import the required packages

In [18]:
# The code below sets us up with some nice formatting for our plots.

%matplotlib inline
%config InlineBackend.figure_format = 'retina'


# Import the required packages

import numpy as np
import os

import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from efficientnet_pytorch import EfficientNet
import splitfolders

import matplotlib.pyplot as plt

### Define the device to use, Either GPU or CPU

In [43]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

CUDA is not available.  Training on CPU ...


## Preparing Our Data

### Download the Data

In [21]:
data_dir = "./UnsplitData/Multi-class Weather Dataset"

In [36]:
classes = os.listdir(data_dir)
classes

['Cloudy', 'Rain', 'Shine', 'Sunrise']

### Split the Dataset into Train, Validate and Test Datasets

In [23]:
splitfolders.ratio(data_dir, output="Data", ratio=(.7, .15, .15))

In [25]:
new_data_dir = './Data'
train_dir = new_data_dir + '/train'
valid_dir = new_data_dir + '/val'
test_dir = new_data_dir + '/test'

### Define Our Transforms

In [29]:
val_test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([.5, .5, .5], [.5, .5, .5])
])

train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(40),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([.5, .5, .5], [.5, .5, .5])
])

### Create Our Dataset Objects

In [30]:
trainset = datasets.ImageFolder(train_dir, transform = train_transform)
validset = datasets.ImageFolder(valid_dir, transform = val_test_transform)
testset = datasets.ImageFolder(test_dir, transform = val_test_transform)

### Create Our Data Loaders

In [31]:
BATCH_SIZE = 16
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
validloader = torch.utils.data.DataLoader(validset, batch_size=BATCH_SIZE, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=True)

### Create Our iterator function

In [32]:
train_iterator = iter(trainloader)
images, labels = train_iterator.next()
print(type(images))
print(images.shape)
print(labels.shape)

<class 'torch.Tensor'>
torch.Size([16, 3, 224, 224])
torch.Size([16])


## Building Our Model

### Load a pretrained EfficientNet Model

In [33]:
model = EfficientNet.from_pretrained('efficientnet-b6')

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth" to C:\Users\Benah/.cache\torch\hub\checkpoints\efficientnet-b6-c76e70fd.pth
100.0%


Loaded pretrained weights for efficientnet-b6


In [34]:
model

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 56, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(56, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        56, 56, kernel_size=(3, 3), stride=[1, 1], groups=56, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(56, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        56, 14, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        14, 56, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        56, 32, kernel_siz

### Freeze all but the final layer of the model

In [35]:
# Freeze training for all "features" layers
for param in model._conv_stem.parameters():
    param.requires_grad = False
    

### Modify the final Layer to include your classes count

In [39]:
linear_layer = nn.Sequential(
                nn.BatchNorm1d(num_features=2304),    
                nn.Linear(2304, 512),
                nn.ReLU(),
                nn.BatchNorm1d(512),
                nn.Linear(512, 128),
                nn.ReLU(),
                nn.BatchNorm1d(num_features=128),
                nn.Dropout(0.4),
                nn.Linear(128, len(classes)),
            )

In [40]:
model._fc = linear_layer

In [41]:
model

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 56, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(56, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        56, 56, kernel_size=(3, 3), stride=[1, 1], groups=56, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(56, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        56, 14, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        14, 56, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        56, 32, kernel_siz

## Training Our Model

### Specify Our Loss and Optimizer Functions

In [42]:
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()

# specify optimizer (stochastic gradient descent) and learning rate = 0.001
optimizer = optim.SGD(model._conv_head.parameters(), lr=0.001)

### Train the Model

In [None]:
# number of epochs to train the model
n_epochs = 2

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    
    ###################
    # train the model #
    ###################
    # model by default is set to train
    for batch_i, (data, target) in enumerate(trainloader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss 
        train_loss += loss.item()
        
        if batch_i % 5 == 4:    # print training loss every specified number of mini-batches
            print('Epoch %d, Batch %d loss: %.16f' %
                  (epoch, batch_i + 1, train_loss / 5))
            train_loss = 0.0