In [1]:
# Preparation
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os, sys
import copy

plt.ion()  

use_gpu = torch.cuda.is_available()
if use_gpu:
    print("Using CUDA")

Using CUDA


In [2]:
data_dir = 'food-101'
TRAIN = 'train'
TEST = 'test'

# VGG-16 Takes 224x224 images as input, so we resize all of them
data_transforms = {
    TRAIN: transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
    TEST: transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
}

image_datasets = {
    x: datasets.ImageFolder(
        os.path.join(data_dir, x), 
        transform=data_transforms[x]
    )
    for x in [TRAIN, TEST]
    }

dataloaders = {
    x: torch.utils.data.DataLoader(
        image_datasets[x], batch_size=32,
        shuffle=True, num_workers=4
    )
    for x in [TRAIN, TEST]
}

dataset_sizes = {x: len(image_datasets[x]) for x in [TRAIN, TEST]}

for x in [TRAIN, TEST]:
    print("Loaded {} images under {}".format(dataset_sizes[x], x))
    
print("Classes: ")
class_names = image_datasets[TRAIN].classes 
print(image_datasets[TRAIN].classes)

Loaded 75750 images under train
Loaded 25250 images under test
Classes: 
['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio', 'beef_tartare', 'beet_salad', 'beignets', 'bibimbap', 'bread_pudding', 'breakfast_burrito', 'bruschetta', 'caesar_salad', 'cannoli', 'caprese_salad', 'carrot_cake', 'ceviche', 'cheese_plate', 'cheesecake', 'chicken_curry', 'chicken_quesadilla', 'chicken_wings', 'chocolate_cake', 'chocolate_mousse', 'churros', 'clam_chowder', 'club_sandwich', 'crab_cakes', 'creme_brulee', 'croque_madame', 'cup_cakes', 'deviled_eggs', 'donuts', 'dumplings', 'edamame', 'eggs_benedict', 'escargots', 'falafel', 'filet_mignon', 'fish_and_chips', 'foie_gras', 'french_fries', 'french_onion_soup', 'french_toast', 'fried_calamari', 'fried_rice', 'frozen_yogurt', 'garlic_bread', 'gnocchi', 'greek_salad', 'grilled_cheese_sandwich', 'grilled_salmon', 'guacamole', 'gyoza', 'hamburger', 'hot_and_sour_soup', 'hot_dog', 'huevos_rancheros', 'hummus', 'ice_cream', 'lasagna', 'lobster_bisqu

In [3]:
def accuracy(output, target, topk=(1,)):
# Computes the accuracy over the k top predictions for the specified values of k
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [4]:
def eval_model(vgg, criterion):
    since = time.time()
    avg_loss = 0
    avg_acc = 0
    loss_test = 0
    acc_test = 0
    acc1_sum_test = 0
    acc5_sum_test = 0
    acc1_final_test = 0
    acc5_final_test = 0
    cor_class = np.zeros((1,101))
    sum_class = np.zeros((1,101))
    acc_class = np.zeros((1,101))
    category = 0
    
    test_batches = len(dataloaders[TEST])
    print("Evaluating model")
    print('-' * 10)
    
    for i, data in enumerate(dataloaders[TEST]):
        if i % 100 == 0:
            print("\rTest batch {}/{}".format(i, test_batches), end='', flush=True)

        vgg.train(False)
        vgg.eval()
        inputs, labels = data

        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        outputs = vgg(inputs)

        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        
        # Change the number 32 if the batch_size is changed
        for j in range(32):
            if list(labels.data.size())[0] <= 31:
                break
            category = labels.data[j].item()
            sum_class[:, category] += 1
            if labels.data[j].item()==preds[j].item():
                cor_class[:, labels.data[j].item()] += 1 

        loss_test += loss.item()
        acc_test += torch.sum(preds == labels.data).item()
        
        acc1, acc5 = accuracy(outputs, labels, topk=(1, 5))
        acc1_sum_test += acc1.item() * 32 / 100  #change the number is the batch_size is changed
        acc5_sum_test += acc5.item() * 32 / 100
    
        del inputs, labels, outputs, preds
        torch.cuda.empty_cache()
    
    for k in range(101):
        acc_class[:, k] = cor_class[:, k] / sum_class[:, k]
    print()
    print("accuracy of a class: ", acc_class)
    
    avg_loss = loss_test / test_batches
    avg_acc = acc_test / dataset_sizes[TEST]
    acc1_final_test = acc1_sum_test / dataset_sizes[TEST]
    acc5_final_test = acc5_sum_test / dataset_sizes[TEST]
    
    elapsed_time = time.time() - since
    print()
    print("Evaluation completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Avg loss (test): {:.4f}".format(avg_loss))
    print("Avg acc (test): {:.4f}".format(avg_acc))
    print("ACC1 (test): {:.4f}".format(acc1_final_test))
    print("ACC5 (test): {:.4f}".format(acc5_final_test))
    print('-' * 10)
     

In [5]:
# If disconnected or want to resume, load the already trained model
# Be careful, you can only run this part or the next part. Do not run them all !!!!!!
vgg16 = models.vgg16(pretrained=False)
num_features = vgg16.classifier[6].in_features
features = list(vgg16.classifier.children())[:-1] # Remove last layer
features.extend([nn.Linear(num_features, len(class_names))]) # Add our layer with 4 outputs
vgg16.classifier = nn.Sequential(*features) # Replace the model classifier
vgg16.load_state_dict(torch.load('VGG16_food_101_true_dataset.h5'))
print(vgg16)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

In [None]:
# If you want to start from the begining, run this part
# Load the model from pytorch
vgg16 = models.vgg16(pretrained=True)
print(vgg16.classifier[6].out_features) # 1000 

# Freeze training for all layers
for param in vgg16.features.parameters():
    param.require_grad = False

# Newly created modules have require_grad=True by default
num_features = vgg16.classifier[6].in_features
features = list(vgg16.classifier.children())[:-1] # Remove last layer
features.extend([nn.Linear(num_features, len(class_names))]) # Add our layer with 101 outputs
vgg16.classifier = nn.Sequential(*features) # Replace the model classifier
print(vgg16)

In [6]:
if use_gpu:
    vgg16.cuda() #.cuda() will move everything to the GPU side
    
criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [7]:
def train_model(vgg, criterion, optimizer, scheduler, num_epochs = 20):
    since = time.time()
    best_model_wts = copy.deepcopy(vgg.state_dict())
    best_acc = 0.0
    
    avg_loss = 0
    avg_acc = 0
    
    train_batches = len(dataloaders[TRAIN])
    
    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch, num_epochs))
        print('-' * 10)
        
        loss_train = 0
        acc_train = 0
        acc1_sum_train = 0
        acc5_sum_train = 0
        acc1_final_train = 0
        acc5_final_train = 0

        
        vgg.train(True)
        
        for i, data in enumerate(dataloaders[TRAIN]):
            if i % 100 == 0:
                print("\rTraining batch {}/{}".format(i, train_batches / 2), end='', flush=True)
                
            # Use half training dataset
            if i >= train_batches / 2:
                break
                
            inputs, labels = data
            
            if use_gpu:
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)
            
            optimizer.zero_grad()
            
            outputs = vgg(inputs)
            
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            loss_train += loss.item()
            acc_train += torch.sum(preds == labels.data).item()
            
            acc1, acc5 = accuracy(outputs, labels, topk=(1, 5))
            # Change the number 32 is the batch_size is changed
            acc1_sum_train += acc1.item() * 32 / 100  
            acc5_sum_train += acc5.item() * 32 / 100
            
            del inputs, labels, outputs, preds
            torch.cuda.empty_cache()
        
        print()
        avg_loss = loss_train * 2/ train_batches
        avg_acc = acc_train * 2/ dataset_sizes[TRAIN]
        acc1_final_train = acc1_sum_train * 2/ dataset_sizes[TRAIN]
        acc5_final_train = acc5_sum_train * 2/ dataset_sizes[TRAIN]
        print()
        print("Epoch {} result: ".format(epoch))
        print("Avg loss (train): {:.4f}".format(avg_loss))
        print("Avg acc (train): {:.4f}".format(avg_acc))
        print("Acc1 (train): {:.4f}".format(acc1_final_train))
        print("Acc5 (train): {:.4f}".format(acc5_final_train))
        print('-' * 10)
        print()
        
        eval_model(vgg16, criterion)
        
        if avg_acc > best_acc:
            best_acc = avg_acc
            best_model_wts = copy.deepcopy(vgg.state_dict())
            
        torch.save(vgg16.state_dict(), 'VGG16_food_101_true_dataset.h5')
        
    elapsed_time = time.time() - since
    print()
    print("Training completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Best acc: {:.4f}".format(best_acc))
    
    vgg.load_state_dict(best_model_wts)
    return vgg

In [None]:
vgg16 = train_model(vgg16, criterion, optimizer_ft, exp_lr_scheduler, num_epochs = 30)

Epoch 0/30
----------
Training batch 1100/1184.0

Epoch 0 result: 
Avg loss (train): 1.2999
Avg acc (train): 0.6627
Acc1 (train): 0.6627
Acc5 (train): 0.8721
----------

Evaluating model
----------
Test batch 700/790
accuracy of a class:  [[0.372      0.644      0.832      0.86       0.788      0.676
  0.852      0.876      0.696      0.588      0.668      0.89959839
  0.812      0.828      0.844      0.616      0.784      0.68
  0.724      0.796      0.864      0.736      0.608      0.824
  0.864      0.892      0.644      0.8        0.848      0.86
  0.872      0.832      0.9        1.         0.812      0.936
  0.776      0.612      0.824      0.56       0.868      0.828
  0.656      0.812      0.736      0.884      0.772      0.72289157
  0.908      0.64       0.564      0.956      0.812      0.708
  0.936      0.76       0.528      0.74       0.792      0.84
  0.836      0.908      0.72       0.928      0.828      0.864
  0.736      0.508      0.936      0.94       0.876      0.80

Exception ignored in: <bound method _DataLoaderIter.__del__ of <torch.utils.data.dataloader._DataLoaderIter object at 0x7fa5e907c128>>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 349, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 328, in _shutdown_workers
    self.worker_result_queue.get()
  File "/opt/conda/lib/python3.6/multiprocessing/queues.py", line 337, in get
    return _ForkingPickler.loads(res)
  File "/opt/conda/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 70, in rebuild_storage_fd
    fd = df.detach()
  File "/opt/conda/lib/python3.6/multiprocessing/resource_sharer.py", line 58, in detach
    return reduction.recv_handle(conn)
  File "/opt/conda/lib/python3.6/multiprocessing/reduction.py", line 182, in recv_handle
    return recvfds(s, 1)[0]
  File "/opt/conda/lib/python3.6/multiprocessing/reducti

Test batch 700/790
accuracy of a class:  [[0.44       0.656      0.884      0.87550201 0.74       0.628
  0.828      0.9        0.484      0.664      0.688      0.888
  0.888      0.908      0.752      0.368      0.824      0.692
  0.796      0.86       0.868      0.768      0.532      0.84
  0.84       0.9        0.692      0.88       0.844      0.888
  0.896      0.82       0.88       0.996      0.832      0.88
  0.776      0.512      0.844      0.412      0.896      0.876
  0.776      0.788      0.88       0.944      0.744      0.732
  0.788      0.584      0.69879518 0.888      0.832      0.776
  0.876      0.804      0.44       0.832      0.648      0.472
  0.84       0.752      0.808      0.948      0.852      0.852
  0.68       0.628      0.936      0.912      0.972      0.852
  0.808      0.656      0.792      0.956      0.908      0.552
  0.844      0.836      0.74       0.732      0.664      0.892
  0.676      0.652      0.92       0.644      0.932      0.764
  0.936      0.9

Exception ignored in: <bound method _DataLoaderIter.__del__ of <torch.utils.data.dataloader._DataLoaderIter object at 0x7fa5e907cc88>>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 349, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 328, in _shutdown_workers
    self.worker_result_queue.get()
  File "/opt/conda/lib/python3.6/multiprocessing/queues.py", line 337, in get
    return _ForkingPickler.loads(res)
  File "/opt/conda/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 70, in rebuild_storage_fd
    fd = df.detach()
  File "/opt/conda/lib/python3.6/multiprocessing/resource_sharer.py", line 58, in detach
    return reduction.recv_handle(conn)
  File "/opt/conda/lib/python3.6/multiprocessing/reduction.py", line 182, in recv_handle
    return recvfds(s, 1)[0]
  File "/opt/conda/lib/python3.6/multiprocessing/reducti

Test batch 700/790
accuracy of a class:  [[0.592      0.7        0.884      0.864      0.708      0.796
  0.896      0.772      0.652      0.552      0.748      0.952
  0.836      0.772      0.79518072 0.616      0.9        0.696
  0.784      0.776      0.828      0.696      0.648      0.836
  0.908      0.836      0.768      0.892      0.86       0.816
  0.932      0.84       0.872      1.         0.864      0.852
  0.704      0.676      0.864      0.588      0.956      0.772
  0.8        0.804      0.856      0.88       0.768      0.672
  0.876      0.816      0.724      0.92       0.856      0.7
  0.96       0.748      0.48       0.652      0.712      0.728
  0.864      0.9        0.832      0.904      0.872      0.916
  0.696      0.64       0.916      0.904      0.908      0.756
  0.812      0.72       0.812      0.92       0.912      0.5
  0.74       0.84       0.784      0.776      0.53012048 0.852
  0.684      0.516      0.776      0.66       0.88       0.764
  0.86       0.908

Exception ignored in: <bound method _DataLoaderIter.__del__ of <torch.utils.data.dataloader._DataLoaderIter object at 0x7fa5bc99fb70>>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 349, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 328, in _shutdown_workers
    self.worker_result_queue.get()
  File "/opt/conda/lib/python3.6/multiprocessing/queues.py", line 337, in get
    return _ForkingPickler.loads(res)
  File "/opt/conda/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 70, in rebuild_storage_fd
    fd = df.detach()
  File "/opt/conda/lib/python3.6/multiprocessing/resource_sharer.py", line 57, in detach
    with _resource_sharer.get_connection(self._id) as conn:
  File "/opt/conda/lib/python3.6/multiprocessing/resource_sharer.py", line 87, in get_connection
    c = Client(address, authkey=process.current_process

Test batch 700/790
accuracy of a class:  [[0.332      0.776      0.876      0.84       0.82       0.836
  0.792      0.868      0.648      0.82       0.596      0.884
  0.848      0.828      0.812      0.6        0.892      0.764
  0.744      0.804      0.76       0.75502008 0.592      0.796
  0.908      0.892      0.76       0.86       0.86       0.836
  0.944      0.852      0.888      0.996      0.904      0.864
  0.74       0.74       0.84       0.536      0.9        0.9
  0.8        0.78       0.852      0.88       0.756      0.724
  0.92       0.68       0.684      0.892      0.844      0.796
  0.92       0.804      0.58       0.712      0.8        0.672
  0.768      0.916      0.736      0.924      0.912      0.86
  0.784      0.632      0.848      0.948      0.928      0.84
  0.808      0.668      0.692      0.948      0.824      0.504
  0.928      0.844      0.76706827 0.852      0.56       0.872
  0.596      0.764      0.776      0.672      0.876      0.632
  0.824      0.912

Exception ignored in: <bound method _DataLoaderIter.__del__ of <torch.utils.data.dataloader._DataLoaderIter object at 0x7fa5e90bcf28>>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 349, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 328, in _shutdown_workers
    self.worker_result_queue.get()
  File "/opt/conda/lib/python3.6/multiprocessing/queues.py", line 337, in get
    return _ForkingPickler.loads(res)
  File "/opt/conda/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 70, in rebuild_storage_fd
    fd = df.detach()
  File "/opt/conda/lib/python3.6/multiprocessing/resource_sharer.py", line 57, in detach
    with _resource_sharer.get_connection(self._id) as conn:
  File "/opt/conda/lib/python3.6/multiprocessing/resource_sharer.py", line 87, in get_connection
    c = Client(address, authkey=process.current_process

Test batch 700/790
accuracy of a class:  [[0.492      0.788      0.88       0.852      0.784      0.636
  0.892      0.94       0.62       0.552      0.75903614 0.932
  0.768      0.82       0.844      0.472      0.792      0.652
  0.772      0.812      0.96       0.568      0.74       0.876
  0.848      0.864      0.76       0.916      0.868      0.92
  0.932      0.808      0.888      0.99598394 0.728      0.856
  0.82       0.54       0.828      0.544      0.912      0.88
  0.696      0.804      0.848      0.888      0.8        0.62
  0.816      0.824      0.692      0.884      0.884      0.772
  0.888      0.772      0.608      0.768      0.648      0.728
  0.892      0.844      0.608      0.928      0.868      0.872
  0.672      0.6        0.944      0.932      0.912      0.764
  0.884      0.668      0.864      0.932      0.796      0.548
  0.852      0.828      0.848      0.804      0.472      0.888
  0.744      0.684      0.9        0.652      0.94       0.808
  0.976      0.69