# VGG for Dog Emotion Classification

## Load Library and Configurations

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torch.utils.data.sampler import SubsetRandomSampler
# from google.colab import drive 
from matplotlib import pyplot as plt
import math
# import os

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# Load Google Drive
# drive.mount('/content/drive')
# os.chdir('/content/drive/MyDrive/Final_Project')

# Setting Seed
torch.manual_seed(1234)

cuda


<torch._C.Generator at 0x1ff88023d50>

## Loading Data with Data loader

In [2]:
def data_loader(data_dir, batch_size, shuffle = True): 
  # data augumentation
  transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(
      mean=[0.485, 0.456, 0.406],
      std=[0.229, 0.224, 0.225],
    ),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(degrees = (-90, 90)),
    # transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 1))
  ])

  
  dataset = datasets.ImageFolder(data_dir, transform = transform)
  # classes = dataset.classes

  # Split data into Training and Testing Set
  data_split = torch.utils.data.random_split(dataset, lengths = [0.95, 0.05])
  dataset_train = data_split[0]
  dataset_test = data_split[1]
  
  train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle = shuffle)

  test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, shuffle = shuffle)

  return (train_loader, test_loader)




train_loader, test_loader = data_loader('./Data', batch_size = 13)

## Defining Pretrain Model

In [None]:
vgg16 = models.vgg16_bn(weights=models.VGG16_BN_Weights.DEFAULT)

# Original pretrained VGG model
vgg16

In [None]:
# Modified Pretrained Model
in_features = vgg16.classifier[6].in_features
layers = list(vgg16.classifier.children())[:-1] # Remove last layer
layers.extend([nn.Linear(in_features, 4)]) # Add our layer with 4 outputs
# Randomly initialize weights and biases
bound = 1/math.sqrt(layers[0].in_features)
nn.init.kaiming_uniform_(layers[0].weight)
nn.init.uniform_(layers[0].bias, -bound, bound)

bound = 1/math.sqrt(layers[3].in_features)
nn.init.kaiming_uniform_(layers[3].weight)
nn.init.uniform_(layers[3].bias, -bound, bound)

bound = 1/math.sqrt(layers[6].in_features)
nn.init.kaiming_uniform_(layers[6].weight)
nn.init.uniform_(layers[6].bias, -bound, bound)

vgg16.classifier = nn.Sequential(*layers) # Replace the model classifier
print(vgg16)

In [3]:
# Insert code to load previously trained models on the same dataset
# Needed due to limited computation power
vgg16 = torch.load('./trained_models/vgg16_dog_emotion_best.pt')
vgg16

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [4]:
# Freeze all feature layers
for param in vgg16.features.parameters():
    param.requires_grad = False

# test code for viewing model parameters
# for name, param in vgg16.named_parameters():
#     if param.requires_grad:
#         print(name, param)

In [5]:
# Move Model to GPU
vgg16.train()
vgg16 = vgg16.to(device)

## Hyperparameters

In [6]:
num_epochs = 50
learning_rate = 0.001

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(vgg16.classifier.parameters(), lr=learning_rate, weight_decay = 0, momentum = 0.9)  

## Training

In [9]:
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass
        outputs = vgg16(images)
        loss = criterion(outputs, labels)
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i % 100 == 0):
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i, total_step, loss.item()))
            
    # Validation
    vgg16.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = vgg16(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
    
        print('Accuracy of the network on training images: {} %'.format(100 * correct / total)) 
    vgg16.train()
    # save
    torch.save(vgg16, f"./trained_models/vgg16_dog_emotion_{epoch+1}.pt")

Epoch [1/50], Step [0/1456], Loss: 0.7783
Epoch [1/50], Step [100/1456], Loss: 1.0850
Epoch [1/50], Step [200/1456], Loss: 1.3454
Epoch [1/50], Step [300/1456], Loss: 1.2907
Epoch [1/50], Step [400/1456], Loss: 0.6299
Epoch [1/50], Step [500/1456], Loss: 0.9643
Epoch [1/50], Step [600/1456], Loss: 0.9154
Epoch [1/50], Step [700/1456], Loss: 1.0268
Epoch [1/50], Step [800/1456], Loss: 0.8327
Epoch [1/50], Step [900/1456], Loss: 0.9072
Epoch [1/50], Step [1000/1456], Loss: 0.8800
Epoch [1/50], Step [1100/1456], Loss: 0.7284
Epoch [1/50], Step [1200/1456], Loss: 0.6914
Epoch [1/50], Step [1300/1456], Loss: 0.5950
Epoch [1/50], Step [1400/1456], Loss: 1.2101
Accuracy of the network on training images: 61.55878467635403 %
Epoch [2/50], Step [0/1456], Loss: 1.2259
Epoch [2/50], Step [100/1456], Loss: 1.0910
Epoch [2/50], Step [200/1456], Loss: 1.0039
Epoch [2/50], Step [300/1456], Loss: 0.8493
Epoch [2/50], Step [400/1456], Loss: 1.3828
Epoch [2/50], Step [500/1456], Loss: 1.0775
Epoch [2/50