In [None]:
from torchvision import models
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import cv2
import os
import glob
import random
from shutil import copyfile, rmtree
import sys
from torch.utils.data import random_split
from sys import exit
from os.path import join, basename, dirname, exists
from torch.optim import lr_scheduler
import matplotlib.pyplot as plt
import torch
from torchvision import datasets, transforms
import torch.optim as optim
import torchvision
import os
import numpy as np
import datetime
from os.path import join
import logging
import copy
from torchvision.datasets import ImageFolder
from copy import deepcopy 

In [None]:
def get_data_extract():
    """Method downloads dataset."""
    if "food-101" in os.listdir():
        print("Dataset already exists")
    else:
        print("Downloading the data...")
        !wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
        print("Dataset downloaded!")
        print("Extracting data..")
        !tar xzvf food-101.tar.gz &> /dev/null
        print("Extraction done!")

In [None]:
get_data_extract()

Downloading the data...
--2020-09-23 18:27:19--  http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
Resolving data.vision.ee.ethz.ch (data.vision.ee.ethz.ch)... 129.132.52.162
Connecting to data.vision.ee.ethz.ch (data.vision.ee.ethz.ch)|129.132.52.162|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz [following]
--2020-09-23 18:27:19--  https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
Connecting to data.vision.ee.ethz.ch (data.vision.ee.ethz.ch)|129.132.52.162|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4996278331 (4.7G) [application/x-gzip]
Saving to: ‘food-101.tar.gz’


2020-09-23 18:29:27 (37.2 MB/s) - ‘food-101.tar.gz’ saved [4996278331/4996278331]

Dataset downloaded!
Extracting data..
Extraction done!


In [None]:
# Split data in 3 parts: train - 75%, validation - 15% and test - 15
# Also, we use partial_coefficient to use only part of whole dataset
# partial_coefficient = 1 means use 100% of dataset
partial_coefficient = 1
dataset = ImageFolder('./food-101/images')
dataset_length = len(dataset)
train_size = int(dataset_length * 0.70 * partial_coefficient)
val_size = int(dataset_length * 0.15 * partial_coefficient)
test_size = int(dataset_length * 0.15 * partial_coefficient)
train_subset, val_subset, test_subset, _ = random_split(
    dataset,
    [
        train_size,
        val_size,
        test_size,
        dataset_length - sum([train_size, val_size, test_size]),
    ],
)

# Apply transformations on data
data_transforms = {
    'train': transforms.Compose(
        [
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],
            ),
        ]
    ),
    'val': transforms.Compose(
        [
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],
            ),
        ]
    ),
    'test': transforms.Compose(
        [
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],
            ),
        ]
    ),
}

# Initialization train, validation and test pytorch dataloaders
train_subset.dataset = deepcopy(dataset)
train_subset.dataset.transform = data_transforms['train']

val_subset.dataset = deepcopy(dataset)
val_subset.dataset.transform = data_transforms['val']

test_subset.dataset.transform = data_transforms['test']

train = torch.utils.data.DataLoader(
    train_subset, batch_size=8, shuffle=True, num_workers=4
)
val = torch.utils.data.DataLoader(val_subset, batch_size=8, shuffle=True, num_workers=4)
test = torch.utils.data.DataLoader(
    test_subset, batch_size=8, shuffle=True, num_workers=4
)

dataloaders = {'train': train, 'val': val, 'test': test}

print(
    *list(map(lambda ds: f'{ds[0]}: {len(ds[1].dataset)}', dataloaders.items())),
    sep='\n',
)

train: 70700
val: 15150
test: 15150


In [None]:
# Prepare some folders
root_folder = './food-101'
plots_folder = './plots'
models_folder = './models'
images_folder = join(root_folder, 'images')
if not os.path.exists(plots_folder):
    os.makedirs(plots_folder) 

if not os.path.exists(models_folder):
    os.makedirs(models_folder)

In [None]:
class SliceBranch(torch.nn.Module):
    """Horizontal SliceBranch NN."""
    def __init__(self, input_size, output_size):
        super(SliceBranch, self).__init__()
        kernel_size = (224, 5)
        self.wide_conv = torch.nn.Conv2d(
            input_size, output_size, kernel_size, stride=1, padding=0, bias=True
        )
        self.bn = torch.nn.BatchNorm2d(output_size)
        self.maxpool = torch.nn.MaxPool2d((1, 5))

    def forward(self, x):
        out1 = F.relu(self.bn(self.wide_conv(x)))
        out2 = self.maxpool(out1)
        out3 = self.maxpool(out2)
        out4 = self.maxpool(out3)
        return out4


class WideResnet101PlusSlice(torch.nn.Module):
    """Combination pretrained WideResnet101 NN with out SliceBranch."""
    def __init__(self, nb_classes, drop_prob):
        super(WideResnet101PlusSlice, self).__init__()
        self.slice_branch = SliceBranch(3, 320)
        self.wide_res101_pretrained = torch.hub.load(
            'pytorch/vision', 'wide_resnet101_2', pretrained=True
        )
        self.res101_branch = torch.nn.Sequential(
            *list(self.wide_res101_pretrained.children())[:-1]
        )

        self.fc1 = torch.nn.Linear(2368, 2048)
        self.dropout = nn.Dropout(p=drop_prob)
        self.fc2 = torch.nn.Linear(2048, nb_classes)

    def forward(self, x):
        s_b = self.slice_branch(x)
        r_b = self.res101_branch(x)
        out = torch.cat([s_b, r_b], dim=1)
        out = torch.flatten(out, 1)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out


In [None]:
def train_val_model(model, criterion, optimizer, scheduler, num_epochs=15):
    """Method train model and calculates intermediates accuracy. """
    best_model_weights = copy.deepcopy(model.state_dict())
    best_accuracy = 0.0
    train_epoch_losses = []
    val_epoch_losses = []
    for epoch in range(num_epochs):
        print('\nEpoch {}/{}'.format(epoch, num_epochs - 1))
        print('Learning rate {}'.format(scheduler.get_lr()))

        for phase in ['train', 'val']:
            model.train() if phase == 'train' else model.eval()

            running_losses = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, predictions = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_losses += loss.item() * inputs.size(0)
                running_corrects += torch.sum(predictions == labels.data)

            epoch_loss = running_losses / len(dataloaders[phase].dataset)
            epoch_accuracy = running_corrects.double() / len(dataloaders[phase].dataset)
            if phase == 'train':
                train_epoch_losses.append(epoch_loss)
            else:
                val_epoch_losses.append(epoch_loss)
            print(
                '\t{} loss: {:.4f}, {} accuracy: {:.4f}'.format(
                    phase, epoch_loss, phase, epoch_accuracy
                )
            )

            if phase == 'val' and epoch_accuracy > best_accuracy:
                best_accuracy = epoch_accuracy
                best_model_weights = copy.deepcopy(model.state_dict())
                torch.save(best_model_weights, join(models_folder, '/content/drive/My Drive/icv project/checkpoint4.pth'))

            if phase == 'train':
                scheduler.step()

    print('Best validation accuracy: {:4f}'.format(best_accuracy))
    model.load_state_dict(best_model_weights)
    return model, train_epoch_losses, val_epoch_losses


def configure_run_model(
    nb_classes, drop_prob, multi_gpu, lr_step_size, lr_step_gamma, epochs
):
    """Method initializes all necessary stuff for model training."""

    criterion = nn.CrossEntropyLoss()
    model = WideResnet101PlusSlice(nb_classes, drop_prob)

    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    if multi_gpu:
        print("Using {} GPUs.".format(torch.cuda.device_count()))
        model = torch.nn.DataParallel(model)

    model = model.to(device)

    exp_lr_scheduler = lr_scheduler.StepLR(
        optimizer, step_size=lr_step_size, gamma=lr_step_gamma,
    )

    model = train_val_model(model, criterion, optimizer, exp_lr_scheduler, epochs)
    return model

In [None]:
# Initialize the CUDE cores
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    total_gpus = torch.cuda.device_count()
    print('Total number of GPUs:{}'.format(total_gpus))
    if total_gpus == 1:
        multi_gpu = False
    elif total_gpus > 1:
        multi_gpu = True

Total number of GPUs:1


In [None]:
# Train model
model, train_losses, val_losses = configure_run_model(
    len(dataset.classes), 
    drop_prob=0.5, 
    multi_gpu=multi_gpu,
    lr_step_size=40,
    lr_step_gamma=0.1,
    epochs=100,
)

Using cache found in /root/.cache/torch/hub/pytorch_vision_master



Epoch 0/99
Learning rate [0.01]




	train loss: 4.5468, train accuracy: 0.0381
	val loss: 3.9815, val accuracy: 0.0886

Epoch 1/99
Learning rate [0.01]
	train loss: 3.7189, train accuracy: 0.1421
	val loss: 3.0532, val accuracy: 0.2747

Epoch 2/99
Learning rate [0.01]
	train loss: 2.9393, train accuracy: 0.2952
	val loss: 2.1183, val accuracy: 0.4708

Epoch 3/99
Learning rate [0.01]
	train loss: 2.9506, train accuracy: 0.3030
	val loss: 5.6043, val accuracy: 0.0691

Epoch 4/99
Learning rate [0.01]
	train loss: 2.7362, train accuracy: 0.3447
	val loss: 2.1855, val accuracy: 0.4827

Epoch 5/99
Learning rate [0.01]
	train loss: 2.3481, train accuracy: 0.4245
	val loss: 1.7702, val accuracy: 0.5735

Epoch 6/99
Learning rate [0.01]
	train loss: 2.0983, train accuracy: 0.4803
	val loss: 1.8305, val accuracy: 0.5971

Epoch 7/99
Learning rate [0.01]
	train loss: 1.9653, train accuracy: 0.5114
	val loss: 1.5181, val accuracy: 0.6225

Epoch 8/99
Learning rate [0.01]
	train loss: 1.9434, train accuracy: 0.5181
	val loss: 1.6296, v

In [None]:
# Load trained model from checkpoint
model = WideResnet101PlusSlice(len(dataset.classes), drop_prob=0.5)
model.load_state_dict(torch.load('/content/drive/My Drive/icv project/49_epoch_full_dataset'))
model = torch.nn.DataParallel(model)

Using cache found in /root/.cache/torch/hub/pytorch_vision_master


In [None]:
# Calculate test, validation and train accuracy
def evaluate_model(model, dataset):
    model.eval()
    running_corrects = 0    
    with torch.no_grad():
        for _, (inputs, labels) in enumerate(dataset):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            running_corrects += torch.sum(predictions == labels.data)
            acc = running_corrects.double() / len(dataset.dataset)
    return round(acc.item(), 2)   

print(f'Train accuracy: {evaluate_model(model, dataloaders["train"])}')
print(f'Val accuracy: {evaluate_model(model, dataloaders["val"])}')
print(f'Test accuracy: {evaluate_model(model, dataloaders["test"])}')

Train accuracy: 0.84
Val accuracy: 0.91
Test accuracy: 0.91


In [None]:
# Classification of random images from the Internet

from PIL import Image

idx_to_class = {id: class_name for class_name, id in dataset.class_to_idx.items()}

with torch.no_grad():
    model.eval()
    image = data_transforms['test'](Image.open('/content/delish-deviled-eggs-horizontal-1542055209.jpg'))
    inputs = image.to(device)
    outputs = model(inputs[None])
    _, predictions = torch.max(outputs, 1)
    print(idx_to_class[predictions.item()])

deviled_eggs
