In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

# 0  Preparation

In [None]:
!ls
!pwd
!ls /
!ls /kaggle/
!ls /kaggle/input/
!ls /kaggle/input/polytech-ds-2019/
!ls /kaggle/input/models/

In [None]:
!pip install timm

In [None]:
!pip install efficientnet_pytorch

In [None]:
from IPython.display import FileLink
os.chdir(r'/kaggle/working')

# 1  Model

## 1.1  Packages

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import glob
from IPython.display import display
import torch.utils.model_zoo as model_zoo
import torchvision.models as models
from tqdm import tqdm_notebook
import random
import timm
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.metrics import confusion_matrix

## 1.2  Datasets

The original training and validation datasets contains 9866 and 3430 images respectively. The number of validation images are rather large, so we merged them together and re-split randomly. We used a split ratio of 10%.

In [None]:
# diretory of data
img_dir = [r"/kaggle/input/polytech-ds-2019/polytech-ds-2019/training/", 
           r"/kaggle/input/polytech-ds-2019/polytech-ds-2019/validation/"]

class trainDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, train_list):
        super().__init__()
        self.root_dir = root_dir
        self.train_list = train_list
        self.img_names = [self.root_dir + os.sep + item for item in self.train_list]
        self.labels = [int(item.split(os.sep)[1].split('_')[0]) for item in self.train_list]

        self.transform = transforms.Compose([transforms.RandomHorizontalFlip(p=0.5),
                                             #transforms.RandomRotation(180),
                                             transforms.ColorJitter(brightness=(0.3 if random.random()<0.5 else False),
                                                                    contrast=(0.2 if random.random()<0.5 else False),
                                                                    saturation=(0.2 if random.random()<0.5 else False),
                                                                    hue=(0.1 if random.random()<0.5 else False)),
                                             transforms.RandomAffine(10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=(0.15, 0), resample=False),
                                             transforms.Resize((256, 256)),
                                             transforms.CenterCrop((224, 224)),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.485, 0.456, 0.406],
                                                                   [0.229, 0.224, 0.225])
                                            ])

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, i):
        img = Image.open(self.img_names[i]).convert('RGB')
        return self.transform(img), self.labels[i]



class valDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, val_list):
        super().__init__()
        self.root_dir = root_dir
        self.val_list = val_list
        self.img_names = [self.root_dir + os.sep + item for item in self.val_list]
        self.labels = [int(item.split(os.sep)[1].split('_')[0]) for item in self.val_list]

        # PyTorch transforms
        self.transform = transforms.Compose([transforms.Resize((256, 256)),
                                             transforms.CenterCrop((224, 224)),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.485, 0.456, 0.406],
                                                                  [0.229, 0.224, 0.225])
                                            ])

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, i):
        img = Image.open(self.img_names[i]).convert('RGB')
        return self.transform(img), self.labels[i]


def random_split_train_val(train_dir, val_dir, split_ratio):
    train_names = ['training' + os.sep + item for item in os.listdir(train_dir)]
    val_names = ['validation' + os.sep + item for item in os.listdir(val_dir)]
    all_names = train_names + val_names
    nums = len(all_names)
    ratio = split_ratio
    random.shuffle(all_names)
    new_val = all_names[:round(nums * ratio)]
    new_train = all_names[round(nums * ratio):]
    return new_train, new_val


train_dir = r"/kaggle/input/polytech-ds-2019/polytech-ds-2019/training/"
val_dir = r"/kaggle/input/polytech-ds-2019/polytech-ds-2019/validation/"
new_train, new_val = random_split_train_val(train_dir, val_dir, 0.1)
train_set = trainDataset(r"/kaggle/input/polytech-ds-2019/polytech-ds-2019/", new_train)
print("training dataset size",len(train_set))
val_set = valDataset(r"/kaggle/input/polytech-ds-2019/polytech-ds-2019/", new_val)
print("validation dataset size",len(val_set))

def display_tensor(t):
    trans = transforms.ToPILImage()
    display(trans(t))


for i in range(10):
    img_train, label_train = train_set[i]
    img_val, label_val = val_set[i]
    display_tensor(img_train)
    print("class : ", label_train)
    display_tensor(img_val)
    print("class : ", label_val)

## 1.3 Create Models

We trained different models including InceptionV3, ResNext50, ResNext101, Mixnet_l, Mixnet_xl and EfficientNet_B5. We used the pretrained models on ImageNet and finetuned them on the food11 dataset. 

The first three models are loaded from Pytorch model zoo, among them Resnext101 has the best accuracy with the last 6 layers unfrozen (we didn't do elaborate experiments of this and this is the best we got). 

For Mixnet (https://arxiv.org/abs/1907.09595) we unfroze all the layers.

For EfficientNet (https://arxiv.org/abs/1905.11946) we unfroze all the layers.

In [None]:
batch_size = 25
test_split = 0.1

train_dl = torch.utils.data.DataLoader(train_set, batch_size=batch_size,shuffle=True)
val_dl = torch.utils.data.DataLoader(val_set, batch_size=batch_size)


# unfreezing function for certain layers, only used on ResNext        
def unfreeze_last_layers(model, feature_extracting):
    for param in model.parameters():
        param.requires_grad = False
    next(model.layer4[1].conv1.parameters()).requires_grad = True
    next(model.layer4[1].conv2.parameters()).requires_grad = True
    next(model.layer4[1].conv3.parameters()).requires_grad = True
    next(model.layer4[2].conv1.parameters()).requires_grad = True
    next(model.layer4[2].conv2.parameters()).requires_grad = True
    next(model.layer4[2].conv3.parameters()).requires_grad = True
    next(model.layer4[0].conv1.parameters()).requires_grad = True
    next(model.layer4[0].conv2.parameters()).requires_grad = True
    next(model.layer4[0].conv3.parameters()).requires_grad = True
    next(model.layer3[22].conv1.parameters()).requires_grad = True
    next(model.layer3[22].conv2.parameters()).requires_grad = True
    next(model.layer3[22].conv3.parameters()).requires_grad = True

# model1 mixnet_l
model = timm.create_model('mixnet_l', pretrained=True)
model.classifier = nn.Linear(1536, 11)
print(model)

# # model2 resnext101
# model = models.resnext101_32x8d(pretrained=True)
# # unfreezing last layers in resnext in order to use the bottom features and reduce calculation (resnext101 model is quite big), 
# # which also yielded a better accuracy in our experiments
# unfreeze_last_layers(model)
# fc_features = model.fc.in_features
# model.fc = nn.Linear(fc_features, 11)
# print(model)

# # model3 mixnet_xl
# model = timm.create_model('mixnet_xl', pretrained=True)
# model.classifier = nn.Linear(1536, 11)
# print(model)

# # model4 inceptionV3
# model = torchvision.models.inception_v3(pretrained=True, aux_logits=False, transform_input=False)
# model.fc = nn.Linear(2048, 11)
# model.cuda()

## 1.4  Training and Validation

Our training process contains two phases: 

1. Using the CosineAnnealing scheduler. As the learning rates for each epoch are calculated automatically and sometimes the minimizing is interupted by the increasing learning rate in the ascending period of the cosine function, we added the second phase of training, tuned the learning rate in a more controlable way. 

2. Using the MultiStep scheduler. From the optimal model obtained in phase 1, we restart the training with a smaller learning rate and obtained our final optimal model.

### 1.4.1  Phase 1 Training

In [None]:
torch.cuda.empty_cache()
LEARNING_RATE = 0.01
model.cuda()
N_EPOCHS = 50

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = LEARNING_RATE, momentum=0.9, weight_decay=1e-4) 
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 6)

In [None]:
epoch_val_loss = [] 
epoch_val_acc = []
epoch_train_loss = []
epoch_train_acc = []
flag = 0
best_val_acc = 0
for e in range(N_EPOCHS):
    print("EPOCH:",e)
    running_loss = 0
    running_accuracy = 0
    model.train()
    for i, batch in enumerate(tqdm_notebook(train_dl)):
#         # quick check for max batch size
        if i == 5:
            break
        x = batch[0]
        labels = batch[1]
        x = x.cuda()
        labels = labels.cuda()

        y = model(x)
        loss = criterion(y, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_accuracy += (y.max(1)[1] == labels).sum().item()
    
    print("Training accuracy: {:.2f}%".format(100*running_accuracy/float(len(train_set))),
          "Training loss:", running_loss/float(len(train_dl)), "learning rate:", scheduler.get_lr()[0])
    
    scheduler.step()
    epoch_train_acc.append(running_accuracy/float(len(train_set)))
    epoch_train_loss.append(running_loss/float(len(train_set)))
    

    model.eval()
    running_val_loss = 0
    running_val_accuracy = 0
    
    for i, batch in enumerate(val_dl):
        with torch.no_grad():
#             # quick check for max batch size
            if i == 5:
                break
            x = batch[0]
            labels = batch[1]
            x = x.cuda()
            labels = labels.cuda()
            y = model(x)
            loss = criterion(y, labels)
            running_val_loss += loss.item()
            running_val_accuracy += (y.max(1)[1] == labels).sum().item()
    acc = running_val_accuracy/float(len(val_set))
    if acc > best_val_acc:
        best_val_acc = acc
        torch.save(model.state_dict(), '/kaggle/working/model_mixnet_phase1.pkl')
        flag = 0
    print("Validation accuracy:{:.2f}%".format(100* acc),
          "Validation loss:", running_val_loss/float(len(val_set)))
    epoch_val_loss.append(running_val_loss/len(val_set))
    epoch_val_acc.append(running_val_accuracy/len(val_set))


# model=mixnet_xl, scheduler=CosineAnnealing, lr=0.01
# Training accuracy: 99.87% Training loss: 0.0056521773303466446 learning rate: 0.006545084971874732
# Validation accuracy:96.71% Validation loss: 0.0057580280123060035

### Visualization of Loss and Accuracy

In [None]:
def plot_accuracy(train_acc, val_acc):
    plt.title('FOOD11')
    plt.plot(train_acc)
    plt.plot(val_acc)
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train_accuracy', 'validation_accuracy'], loc='best')
    plt.show()
    
def plot_loss(train_loss, val_loss):
    plt.title('FOOD11')
    plt.plot(train_loss)
    plt.plot(val_loss)
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train_loss', 'validation_loss'], loc='best')
    plt.show()

In [None]:
plot_accuracy(epoch_train_acc, epoch_val_acc) 
plot_loss(epoch_train_loss, epoch_val_loss)

### 1.4.2  Phase 2 Training

We restarted training with a different scheduler and smaller learning rate, with the best model obatined by CosineAnnealing (phase1), did fine-grained learning rate control using multistep scheduler and obtained the final best model (phase2).

In [None]:
# restart_training = timm.create_model('mixnet_xl', pretrained=True)
restart_training = timm.create_model('mixnet_l', pretrained=True)
restart_training.classifier = nn.Linear(1536, 11)
restart_training.load_state_dict(torch.load('/kaggle/working/model_mixnet_phase1.pkl'))

torch.cuda.empty_cache()
LEARNING_RATE = 0.001
restart_training.cuda()

N_EPOCHS = 50
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(restart_training.parameters(), lr = LEARNING_RATE, momentum=0.9, weight_decay=1e-4) 
MILESTONE = [20, 30, 40]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONE, gamma=0.1)

In [None]:
epoch_val_loss = [] 
epoch_val_acc = []
epoch_train_loss = []
epoch_train_acc = []
best_val_acc = 0
flag = 0
for e in range(N_EPOCHS):
    print("EPOCH:",e)
    running_loss = 0
    running_accuracy = 0
    restart_training.train()
    for i, batch in enumerate(tqdm_notebook(train_dl)):
#         # quick check
        if i == 5:
            break
        x = batch[0]
        labels = batch[1]
        x = x.cuda()
        labels = labels.cuda()
        y = restart_training(x)
        loss = criterion(y, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        running_accuracy += (y.max(1)[1] == labels).sum().item()
    
    print("Training accuracy: {:.2f}%".format(100*running_accuracy/float(len(train_set))),
          "Training loss:", running_loss/float(len(train_dl)), "learning rate:", scheduler.get_lr()[0])
    
    scheduler.step()
    epoch_train_acc.append(running_accuracy/float(len(train_set)))
    epoch_train_loss.append(running_loss/float(len(train_set)))
    
    restart_training.eval()

    running_val_loss = 0
    running_val_accuracy = 0
    
    for i, batch in enumerate(val_dl):
        with torch.no_grad():
#             # quick check for max batch size
            if i == 5:
                break
            x = batch[0]
            labels = batch[1]
            x = x.cuda()
            labels = labels.cuda()
            y = restart_training(x)
            loss = criterion(y, labels)
            running_val_loss += loss.item()
            running_val_accuracy += (y.max(1)[1] == labels).sum().item()
    acc = running_val_accuracy/float(len(val_set))
    if acc > best_val_acc:
        best_val_acc = acc
        torch.save(restart_training.state_dict(), '/kaggle/working/model_mixnet_phase2.pkl')
        flag = 0
    print("Validation accuracy:{:.2f}%".format(100* acc),
          "Validation loss:", running_val_loss/float(len(val_set)))
    epoch_val_loss.append(running_val_loss/len(val_set))
    epoch_val_acc.append(running_val_accuracy/len(val_set))


### Confusion Matrix

In [None]:
restart_training.eval()
y_true = []
y_pred = []
for i, batch in enumerate(val_dl):
    with torch.no_grad():
        x = batch[0]
        labels = batch[1]
        x = x.cuda()
        labels = labels.cuda()
        y = restart_training(x)        
        y_true.extend(y.max(1)[1].tolist())
        y_pred.extend(labels.tolist())
print("confusion matrix")
confusion_matrix(y_true, y_pred)

### Visualization of Loss and Accuracy

In [None]:
plot_accuracy(epoch_train_acc, epoch_val_acc) 
plot_loss(epoch_train_loss, epoch_val_loss)

# 2  Model Ensemble

In [ ]:
With the multiple trained models at hand, we did a simple ensembling of those models to see if there will be improvement.

Our method of ensemble is averaging the predictions (output probabilities) of different models and obtain the final output prediction (averaging is simplified as calculating the sum of predictions in our code).

The pre-trained models are uploaded as "datasets" in kaggle (which we have set them to be public) and added into this notebook. They should be seen under /kaggle/input/models/. EfficientNet doesn't work well both individually and ensemble, so we didn't include nor upload the model.

Among all the ensemble combinations (tried by enumeration), the best accuracy is obtained by the ensemble of (resnext101, mixnet_l, mixnet_xl). 

Plus, we occasionally observed an improvement by zooming in the input image to a larger size when ensemble testing (training: 224, testing: 299), so we kept this method in the final submission. However for individual models this doesn't hold all the time. 

In [None]:
!pip install timm

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import transforms
import torchvision
import os
from PIL import Image
# from efficientnet_pytorch import EfficientNet
import timm
import pandas as pd

class food11_dataset_test(Dataset):
	def __init__(self, root_dir, inp_list, img_transform=None):
		self.imgs = inp_list
		self.transform = img_transform
		self.img_dirs = [os.path.join(root_dir, img_dir) for img_dir in self.imgs]

	def __len__(self):
		return len(self.img_dirs)

	def __getitem__(self, index):
		img = Image.open(self.img_dirs[index]).convert('RGB')
		if self.transform is not None:
			img = self.transform(img)
		name = self.imgs[index].split('.')[0]
		return [img, name]
    
def test_ensemble(test_dir, csv_dir):
    transform_test = transforms.Compose([
		transforms.Resize((RESIZE_SIZE, RESIZE_SIZE)),
		transforms.CenterCrop((CROP_SIZE, CROP_SIZE)),
		transforms.ToTensor(),
		transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    test_list = os.listdir(test_dir)
    test_dataset = food11_dataset_test(test_dir, test_list, transform_test)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE_VAL, shuffle=False, num_workers=4)
    # print(test_dataset[0])
    N_batch_test = len(test_loader)

    ckpt_1 = r'/kaggle/input/models/mixnet_l2_best.pkl'
    ckpt_2 = r'/kaggle/input/models/resnext101_best.pkl'
    ckpt_3 = r'/kaggle/input/models/model_mixnet_best.pkl'
    ckpt_4 = r'/kaggle/input/models/inceptionV3_epoch_30_model.pth'

    
    # mixnet_l
    model_1 = timm.create_model('mixnet_l', pretrained=False)
    model_1.classifier = nn.Linear(1536, 11)
    model_1.load_state_dict(torch.load(ckpt_1))
    model_1.cuda()
    model_1.eval()
    
    # resnext_100
    model_2 = torchvision.models.resnext101_32x8d(pretrained=False)
    fc_features = model_2.fc.in_features
    model_2.fc = nn.Linear(fc_features, 11)
    model_2.load_state_dict(torch.load(ckpt_2))
    model_2.cuda()
    model_2.eval()

    # # mixnet_xl
    model_3 = timm.create_model('mixnet_xl', pretrained=False)
    model_3.classifier = nn.Linear(1536, 11)
    model_3.load_state_dict(torch.load(ckpt_3))
    model_3.cuda()
    model_3.eval()

    # inception_v3
    # model_4 = torchvision.models.inception_v3(pretrained=False, aux_logits=False, transform_input=False)
    # model_4.fc = nn.Linear(2048, 11)
    # model_4.load_state_dict(torch.load(ckpt_4))
    # model_4.cuda()
    # model_4.eval()

    criterion = nn.CrossEntropyLoss()
     
    running_loss = 0
    running_acc = 0
    imgs = []
    cate = []
    start = time.time()
    for i, batch in enumerate(test_loader):
        x = batch[0].cuda()
        y1 = model_1(x)
        y2 = model_2(x)
        y3 = model_3(x)
        # y4 = model_4(x)
        y_fuse = y1 + y2 + y3

    # for i, batch in enumerate(test_loader):
    #     x = batch[0].cuda()
    #     y2 = model_2(x)

        names = batch[1]
        for name in names:
            imgs.append(name)
        for pred in y_fuse.max(1)[1]:
            cate.append(' '+str(pred.item()))
        print("Processing batch %d/%d" % (i, len(test_loader)))

    end = time.time()
    info = "Test finished, elapsed_time=%.3f." % (end - start)
    print(info)
    df = pd.DataFrame()
    df['Id'] = pd.Series(imgs)
    df['Category'] = pd.Series(cate)
    df.to_csv(csv_dir, index=False)
    print("csv file wrote successfully.")
    
if __name__ == "__main__":
    RESIZE_SIZE = 342
    CROP_SIZE = 299
    BATCH_SIZE_VAL = 4

    root_dir = "/kaggle/input/polytech-ds-2019/polytech-ds-2019/"
    test_dir = root_dir + "kaggle_evaluation/"
    csv_dir = "result.csv"
    test_ensemble(test_dir, csv_dir)

In [None]:
!rm /kaggle/working/result.csv
!ls /kaggle/working/

# 3  Test-time Augmentation

We did experiments on test-time augmentation, using five-crop and ten-crop transformation, but found no improvements in accuracy. 
So in our final version we keep the centercrop testing transformation.

In [None]:
!pip install timm

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import transforms
import torchvision
import os
from PIL import Image
import time
# from efficientnet_pytorch import EfficientNet
import timm
import pandas as pd

In [None]:
class food11_dataset_centercrop(torch.utils.data.Dataset):
    def __init__(self, root_dir, val_list, img_transform=None):
        super().__init__()
        self.root_dir = root_dir
        self.val_list = val_list
        self.img_names = [self.root_dir + os.sep + item for item in self.val_list]
        self.labels = [int(item.split(os.sep)[1].split('_')[0]) for item in self.val_list]
        self.transform = img_transform

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, i):
        img = Image.open(self.img_names[i]).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
        return img, self.labels[i]
    

class food11_dataset_fivecrop(Dataset):
	def __init__(self, root_dir, inp_list, img_transform=None):
		self.imgs = inp_list
		self.transform = img_transform
		self.img_dirs = [os.path.join(root_dir, img_dir) for img_dir in self.imgs]
		self.labels = [int(img_name.split('/')[1].split('_')[0]) for img_name in self.imgs]

	def __len__(self):
		return len(self.labels)

	def __getitem__(self, index):
		img = Image.open(self.img_dirs[index]).convert('RGB')
		if self.transform is not None:
			img = self.transform(img)
		label = self.labels[index]
		return img, label

    
    
def test_tencrop(test_dir, val_list, ckpt_dir):
	transform_test = transforms.Compose([
		transforms.Resize((RESIZE_SIZE, RESIZE_SIZE)),
		transforms.TenCrop((CROP_SIZE, CROP_SIZE)),
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
		transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(transforms.ToTensor()(crop)) for crop in crops]))
        # transforms.ToTensor(),
	])
	test_set = food11_dataset_fivecrop(test_dir, val_list, transform_test)
	test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE_VAL, shuffle=False, num_workers=4)
    # print(test_dataset[0])
	N_batch_test = len(test_loader)

    # model = torchvision.models.inception_v3(pretrained=False, aux_logits=False, transform_input=False)
    # model.fc = nn.Linear(2048, 11)
    # state_dict = torch.load(ckpt_dir, map_location='cpu')
    # model.load_state_dict(state_dict)

	model = timm.create_model('mixnet_xl', pretrained=False)
	model.classifier = nn.Linear(1536, 11)
	model.load_state_dict(torch.load(ckpt_dir))
	model.cuda()
	criterion = nn.CrossEntropyLoss()

	model.eval()
	running_loss = 0
	running_acc = 0
	start = time.time()
	for i, batch in enumerate(test_loader):
		x = batch[0].cuda()
		labels = batch[1].cuda()
# 		print(labels.size())
        # print(x.size())
		bs, ncrops, c, h, w = x.size()
		result = model(x.view(-1, c, h, w))
		result_avg = result.view(bs, ncrops, -1).mean(1)
		loss = criterion(result_avg, labels)
		running_loss += loss.item()
		running_acc += (result_avg.max(1)[1] == labels).sum().item()
		print("testing batch %d/%d" % (i, len(test_loader)))

	end = time.time()
	top1_acc = running_acc / len(test_dataset)
	info = "test result: val_loss=%.3f, top1_acc=%.3f, elapsed_time=%.3f" % (
		running_loss / len(test_loader), top1_acc, end-start)
	print(info)


def test_fivecrop(test_dir, val_list, ckpt_dir):
	transform_test = transforms.Compose([
		transforms.Resize((RESIZE_SIZE, RESIZE_SIZE)),
		transforms.FiveCrop((CROP_SIZE, CROP_SIZE)),
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
		transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(transforms.ToTensor()(crop)) for crop in crops]))
        # transforms.ToTensor(),
	])

	test_list = os.listdir(test_dir)
	test_dataset = food11_dataset_fivecrop(test_dir, val_list, transform_test)
	test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE_VAL, shuffle=False, num_workers=4)
    # print(test_dataset[0])
	N_batch_test = len(test_loader)

    # model = torchvision.models.inception_v3(pretrained=False, aux_logits=False, transform_input=False)
    # model.fc = nn.Linear(2048, 11)
    # state_dict = torch.load(ckpt_dir, map_location='cpu')
    # model.load_state_dict(state_dict)

	model = timm.create_model('mixnet_xl', pretrained=False)
	model.classifier = nn.Linear(1536, 11)
	model.load_state_dict(torch.load(ckpt_dir))
	model.cuda()
	criterion = nn.CrossEntropyLoss()
    
	model.eval()
	running_loss = 0
	running_acc = 0
	start = time.time()
	for i, batch in enumerate(test_loader):
		x = batch[0].cuda()
		labels = batch[1].cuda()
# 		print(labels.size())
        # print(x.size())
		bs, ncrops, c, h, w = x.size()
		result = model(x.view(-1, c, h, w))
		result_avg = result.view(bs, ncrops, -1).mean(1)
		loss = criterion(result_avg, labels)
		running_loss += loss.item()
		running_acc += (result_avg.max(1)[1] == labels).sum().item()
		print("testing batch %d/%d" % (i, len(test_loader)))

	end = time.time()
	top1_acc = running_acc / len(test_dataset)
	info = "test result: val_loss=%.3f, top1_acc=%.3f, elapsed_time=%.3f" % (
		running_loss / len(test_loader), top1_acc, end-start)
	print(info)

    
def test_centercrop(test_dir, val_list, ckpt_dir):
	transform_test = transforms.Compose([
		transforms.Resize((RESIZE_SIZE, RESIZE_SIZE)),
		transforms.CenterCrop((CROP_SIZE, CROP_SIZE)),
		transforms.ToTensor(),
		transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
	])

	test_dataset = food11_dataset_centercrop(root_dir, val_list, transform_test)
	test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE_VAL, shuffle=False, num_workers=4)
    # print(test_dataset[0])
	N_batch_test = len(test_loader)

    # model = torchvision.models.inception_v3(pretrained=False, aux_logits=False, transform_input=False)
    # model.fc = nn.Linear(2048, 11)
    # state_dict = torch.load(ckpt_dir, map_location='cpu')
    # model.load_state_dict(state_dict)

	model = timm.create_model('mixnet_xl', pretrained=False)
	model.classifier = nn.Linear(1536, 11)
	model.load_state_dict(torch.load(ckpt_dir))
	model.cuda()
	criterion = nn.CrossEntropyLoss()


	model.eval()
	running_loss = 0
	running_acc = 0
	start = time.time()
	for i, batch in enumerate(test_loader):
		x = batch[0].cuda()
		labels = batch[1].cuda()
# 		print(labels.size())
		y = model(x)
		loss = criterion(y, labels)
		running_loss += loss.item()
		running_acc += (y.max(1)[1] == labels).sum().item()
		print("testing batch %d/%d" % (i, len(test_loader)))

	end = time.time()
	top1_acc = running_acc / len(test_dataset)
	info = "test result: val_loss=%.3f, top1_acc=%.3f, elapsed_time=%.3f" % (
		running_loss / len(test_loader), top1_acc, end-start)
	print(info)




RESIZE_SIZE = 256
CROP_SIZE = 224
RAND_PROB = 0.5
BATCH_SIZE_VAL = 1

root_dir = r"/kaggle/input/polytech-ds-2019/polytech-ds-2019/"
ckpt_dir = r"/kaggle/input/models/model_mixnet_best.pkl"
# 	test_centercrop(root_dir, new_val, ckpt_dir)
# 	test_fivecrop(root_dir, new_val, ckpt_dir)
test_tencrop(root_dir, new_val, ckpt_dir)


# cropsize=224
# center crop test result: val_loss=0.132, top1_acc=0.970, elapsed_time=45.791
# ten crop test result: val_loss=0.122, top1_acc=0.967, elapsed_time=380.180

# cropsize=299
# center no crop test result: val_loss=0.142, top1_acc=0.962, elapsed_time=33.212
# center crop test result: val_loss=0.133, top1_acc=0.966, elapsed_time=35.501
# five crop test result: val_loss=0.126, top1_acc=0.965, elapsed_time=449.507
# ten crop test result: val_loss=0.123, top1_acc=0.966, elapsed_time=115.089

In [1]:
5 progress

we have done 3 Big part 

1) The model part 
1.2) Image augmentation 
we merged the training and validation and re-splited randomly. We used a split ratio of 10%. we resize images to (299,299) then center cropped to (224,224) , 
we normalized the image , and added color jitter, randomflip , randomaffine as our choice for image augmentation
1.3) creating model 
We trained different models InceptionV3, ResNext50, ResNext101, Mixnet_l, Mixnet_xl and EfficientNet_B5. We used the pretrained models on ImageNet and finetuned them on the food11 dataset.

For Mixnet (https://arxiv.org/abs/1907.09595) we unfroze all the layers.

For EfficientNet (https://arxiv.org/abs/1905.11946) we unfroze all the layers.

1.4) Training 
Our training process contains two phases:

Using the CosineAnnealing scheduler. As the learning rates for each epoch are calculated automatically and sometimes the minimizing is interupted by the increasing learning rate in the ascending period of the cosine function, we added the second phase of training, tuned the learning rate in a more controlable way.

Using the MultiStep scheduler. From the optimal model obtained in phase 1, we restart the training with a smaller learning rate and obtained our final optimal model.
1.4.2) Phase2 training
We restarted training with a different scheduler and smaller learning rate, with the best model obatined by CosineAnnealing (phase1), did fine-grained learning rate control using multistep scheduler and obtained the final best model (phase2).

2) Ensemble modeling 
With the multiple trained models at hand, we did a simple ensembling of those models to see if there will be improvement.

Our method of ensemble is averaging the predictions (output probabilities) of different models and obtain the final output prediction (averaging is simplified as calculating the sum of predictions in our code).

The pre-trained models are uploaded as "datasets" in kaggle (which we have set them to be public) and added into this notebook. They should be seen under /kaggle/input/models/. EfficientNet doesn't work well both individually and ensemble, so we didn't include nor upload the model.

Among all the ensemble combinations (tried by enumeration), the best accuracy is obtained by the ensemble of (resnext101, mixnet_l, mixnet_xl). 

Plus, we occasionally observed an improvement by zooming in the input image to a larger size when ensemble testing (training: 224, testing: 299), so we kept this method in the final submission. However for individual models this doesn't hold all the time. 


3) Test Time augmentation

We did experiments on test-time augmentation, using five-crop and ten-crop transformation, but found no improvements in accuracy. 
So in our final version we keep the centercrop testing transformation.

{}

In [None]:
Conclusion 

