# [helpful link](https://www.kaggle.com/carloalbertobarbano/vgg16-transfer-learning-pytorch)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, TensorDataset, Subset
import torchvision
import torchvision.models as models
from torchvision import datasets, transforms
import os
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
import time
import copy
import pickle

import torchsummary
from torchsummary import summary

In [2]:
name = "VGG16_CNN_FROZEN"

In [3]:
use_gpu = torch.cuda.is_available()
if use_gpu:
    print("Using CUDA")

In [4]:
#loading data 1

data_dir = os.path.join('.', 'data')

# VGG-16 Takes 224x224 images as input, so we resize all of them
data_transform = transforms.Compose([transforms.Resize(256),transforms.CenterCrop(224),transforms.ToTensor()])

image_dataset = datasets.ImageFolder(data_dir, transform=data_transform)

dataloader = torch.utils.data.DataLoader(
        image_dataset, batch_size=32,
        shuffle=True, num_workers=4)

dataset_size = len(image_dataset)
class_names = image_dataset.classes

In [5]:
# calc balanced count
# class_counts = {}

# for i in range(len(image_dataset)):
#     label_index = image_dataset[i][1]
#     class_counts[label_index] = class_counts.get(label_index, 0) + 1
    
# balanced_count = None
# balanced_class = None
# for class_ in class_counts:
#     if balanced_class is None or class_counts[class_] < class_counts[balanced_class]:
#         balanced_class = class_
#         balanced_count = class_counts[balanced_class]
# print(balanced_count)
balanced_count = 253

In [6]:
# loading data 3
def indicesSplit(ds, balanced_size, percent_train=0.9):
    train_indices = []
    test_indices = []
    counts = {}
    
    for i in range(len(ds)):
        label_index = ds[i][1]
        
        counts[label_index] = counts.get(label_index, 0) + 1
        
        if counts[label_index] < balanced_size * percent_train:
            train_indices.append(i)
            
        elif counts[label_index] < balanced_size:
            test_indices.append(i)
            
        
    return train_indices, test_indices


In [7]:
# # loading data 4
# k = int(252*.9)

# train_indices, test_indices = indicesSplit(image_dataset, balanced_count)

In [8]:
# with open('test_indices.pickle', 'wb') as file:
#     pickle.dump(test_indices, file)
    
# with open('train_indices.pickle', 'wb') as file:
#     pickle.dump(train_indices, file)
    
with open('test_indices.pickle', 'rb') as file:
    test_indices = pickle.load(file)
    
with open('train_indices.pickle', 'rb') as file:
    train_indices = pickle.load(file)
    
print("images loaded")

images loaded


In [9]:
# loading data 5

train_ds = Subset(image_dataset, train_indices)
test_ds = Subset(image_dataset, test_indices)
train_dataloader = DataLoader(
        train_ds, batch_size=32,
        shuffle=True, num_workers=4
    )
test_dataloader = DataLoader(
        test_ds, batch_size=32,
        shuffle=True, num_workers=4
    )

In [35]:
vgg = models.vgg16(pretrained=True)

In [36]:
def freeze_layers(model, n):
    i = 0
    for name, param in model.named_parameters():
        if i < n:
            param.requires_grad = False
        else:
            break
        i += 1

In [37]:
num_features = vgg.classifier[6].in_features
features = list(vgg.classifier.children())[:-1] # Remove last layer
features.extend([nn.Linear(num_features, len(class_names))]) # Add our layer with 4 outputs
vgg.classifier = nn.Sequential(*features) # Replace the model classifier

n = 0
for param in vgg.parameters():
    n += 1
n_unfrozen = 3
frozen = n - 3 * 2
print(frozen)
freeze_layers(vgg, frozen)


26
a
a
a
a
a
a


In [13]:
summary(vgg, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [14]:
def eval_model(model, criterion):
    since = time.time()
    avg_loss = 0
    avg_acc = 0
    loss_test = 0
    acc_test = 0
    n = 0
    
    test_batches = len(test_dataloader)
    print("Evaluating model")
    print('-' * 10)
    
    for i, data in enumerate(test_dataloader):
        print("\rTest batch {}/{}".format(i+1, test_batches), end='', flush=True)

        model.train(False)
        model.eval()
        inputs, labels = data

        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)
            
        outputs = model(inputs)

        _, preds = torch.max(outputs.data, 1)
        
        loss = criterion(outputs, labels)
        
        n += len(preds)
        loss_test += loss.data.item()
        acc_test += torch.sum(preds == labels).item()

        del inputs, labels, outputs, preds
        torch.cuda.empty_cache()
        
    avg_loss = loss_test / n
    avg_acc = acc_test / n
    
    elapsed_time = time.time() - since
    print()
    print("Evaluation completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Avg loss (test): {:.4f}".format(avg_loss))
    print("Avg acc (test): {:.4f}".format(avg_acc))
    print('-' * 10)

In [15]:
if use_gpu:
    vgg.cuda() #.cuda() will move everything to the GPU side
criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(vgg.parameters(), lr=0.0001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [16]:
print("Test before training")
eval_model(vgg, criterion)

Test before training
Evaluating model
----------
Test batch 10/10
Evaluation completed in 1m 21s
Avg loss (test): 0.0842
Avg acc (test): 0.0700
----------


In [17]:
def train_model(model, criterion, optimizer, scheduler, train_dataloader, test_dataloader, num_epochs=10):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []

    train_batches = len(train_dataloader)
    val_batches = len(test_dataloader)
    
    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch+1, num_epochs))
        print('-' * 10)
        
        loss_train = 0
        loss_val = 0
        acc_train = 0
        acc_val = 0
        n_train = 0
        n_val = 0
        
        model.train(True)
        
        for i, data in enumerate(train_dataloader):
            print("\rTraining batch {}/{}".format(i + 1, train_batches), end='', flush=True)
                
                
            inputs, labels = data
            
            if use_gpu:
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)
            
            optimizer.zero_grad()
            
            outputs = model(inputs)
            
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            n_train += len(preds)
            loss_train += loss.data.item()
            acc_train += torch.sum(preds == labels.data).item()
#             print("p", preds)
#             print("l", labels.data)
#             print("\nn_correct", torch.sum(preds == labels.data).item())
#             print("n", len(preds))
            
            del inputs, labels, outputs, preds
            torch.cuda.empty_cache()
        
        avg_loss_train = loss_train / n_train
        avg_acc_train = acc_train / n_train
        train_losses.append(avg_loss_train)
        train_accs.append(avg_acc_train)


        model.train(False)
        #model.eval()
            
        for i, data in enumerate(test_dataloader):
            print("\rValidation batch {}/{}".format(i+1, val_batches), end='', flush=True)
                
            inputs, labels = data

            if use_gpu:
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)
            
            optimizer.zero_grad()
            
            outputs = model(inputs)
            
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            
            n_val += len(preds)
            loss_val += loss.data.item()
            acc_val += torch.sum(preds == labels.data).item()
            
            del inputs, labels, outputs, preds
            torch.cuda.empty_cache()
        
        avg_loss_val = loss_val / n_val
        avg_acc_val = acc_val / n_val
        val_losses.append(avg_loss_val)
        val_accs.append(avg_acc_val)


        print()
        print("Epoch {} result: ".format(epoch))
        print("Avg loss (train): {:.4f}".format(avg_loss_train))
        print("Avg acc (train): {:.4f}".format(avg_acc_train))
        print("Avg loss (val): {:.4f}".format(avg_loss_val))
        print("Avg acc (val): {:.4f}".format(avg_acc_val))
        print('-' * 10)
        print()
        
        if avg_acc_val > best_acc:
            best_acc = avg_acc_val
            best_model_wts = copy.deepcopy(model.state_dict())
        
    elapsed_time = time.time() - since
    print()
    print("Training completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Best acc: {:.4f}".format(best_acc))
    
    model.load_state_dict(best_model_wts)
    return model, train_losses, train_accs, val_losses, val_accs

In [28]:
pretrain_epoch = 1
vgg_trained, train_losses, train_accs, val_losses, val_accs \
    = train_model(vgg, criterion, optimizer_ft, exp_lr_scheduler, train_dataloader, test_dataloader, num_epochs=pretrain_epoch)


torch.save(vgg_trained.state_dict(), name + '/model.pt')

np.save(name + "/train_losses", np.array(train_losses))
np.save(name + "/train_accs", np.array(train_accs))
np.save(name + "/val_losses", np.array(val_losses))
np.save(name + "/val_accs", np.array(preval_accs))

Epoch 1/1
----------
Training batch 1/86

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn