# Food classification!
## Polytech' Nice Data Science course 2019

 95% score on the leaderboard By Ellatifi Rayane & Husseini Hussein 



The food image classification challenge is centered around classifying images of 11 different kinds of food. Some example images from the dataset as well as a breakdown of the classes, complete with example foods, can be seen in the figure and table below. 


![Food classification](https://www.googleapis.com/download/storage/v1/b/kaggle-user-content/o/inbox%2F3423010%2Fd905717ba90ebea44ba2ce87ef4ccaac%2F2019-10-15-155805_1271x1147_scrot.png?generation=1571147905376326&alt=media)


# Importation & Packages 

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
#PyTorch, of course
import torch
import torch.nn as nn
import torchvision
#We will need torchvision transforms for data augmentation
from torchvision import transforms

### utilities
# tool to print a nice summary of a network, similary to keras' summary

# library to do bash-like wildcard expansion
import glob

# others
import numpy as np
import random
from PIL import Image
from IPython.display import display
from tqdm import tqdm_notebook


# a little helper function do directly display a Tensor
def display_tensor(t):
  trans = transforms.ToPILImage()
  display(trans(t))





In [None]:
from IPython.display import display


Create the food-11 dataset Class
Create the dataset class to load the data. We add a parameter model to control whether its for traning, validation or testing. In the class Food11Dataset, we also implement some data augmentations.

The transformation for training as data augmentation process: 

RandomResizedCrop which crop the given img to 224X224 (since the model we use take this size of images) and aspect ratio.
RandomHorizontalFlip which horizontally flip the given image randomly with a given probability and RandomVerticalFlip which vertically flip the given image randomly with a given probability.
Normalize which normalize a tensor image with mean and standard deviation.

The transformation for valdiation and testing:

Resize which crop the given img to 224X224 (since the model we use take this size of images) and aspect ratio.
Normalize which normalize a tensor image with mean and standard deviation.

# Training class 

In [None]:
class FoodDataset_training_class(torch.utils.data.Dataset):
  
  def __init__(self, img_dir):
    
    super().__init__()
    
    # store directory names
    self.img_dir = img_dir

    
    # use glob to get all image names
    self.img_names = [x.split("/")[6] for x in glob.glob(img_dir + "/*")]
    
    #set label for each image
    self.labels = [a.split("_")[0] for a in self.img_names ]
    
    # PyTorch transforms
    self.transform = transforms.Compose([transforms.Resize((224,224)),transforms.RandomHorizontalFlip(p=0.6),transforms.RandomVerticalFlip(p=0.5)
,transforms.RandomRotation(10)
                                        ,transforms.ToTensor(),
                                         transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])

                                                                                                                
  
  def __len__(self):
    return len(self.img_names)
    
  def __getitem__(self,i):
    return self._read_img(i),int(self.labels[i])
  
  def _read_img(self, i):
    img =Image.open(self.img_dir + "/" + self.img_names[i])
    seed = random.randint(0,2**32)
    random.seed(seed)
    
    return self.transform(img)

# Evaluation Class 


In [None]:
class FoodDataset_eval_class(torch.utils.data.Dataset):
  
  def __init__(self, img_dir):
    
    super().__init__()
    
    # store directory names
    self.img_dir = img_dir

    
    # use glob to get all image names
    self.img_names = [x.split("/")[6] for x in glob.glob(img_dir + "/*")]
    
    #set label for each image
    self.labels = [a.split("_")[0] for a in self.img_names ]
    
    ## We make different transforms when evaluating  
    # PyTorch transforms
    self.transform = transforms.Compose([transforms.Resize((224,224))
                                         
                                         ,transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
                                        

                                                                                                                
  
  def __len__(self):
    return len(self.img_names)
    
  def __getitem__(self,i):
    return self._read_img(i),int(self.labels[i])
  
  def _read_img(self, i):
    img =Image.open(self.img_dir + "/" + self.img_names[i])
    seed = random.randint(0,2**32)
    random.seed(seed)
    
    return self.transform(img)

# Testing Class 

In [None]:
class FoodDataset_test_class(torch.utils.data.Dataset):
  
  def __init__(self, img_dir):
    
    super().__init__()
    
    # store directory names
    self.img_dir = img_dir

    
    # use glob to get all image names
    self.img_names = [x.split("/")[6] for x in glob.glob(img_dir + "/*")]
    
    self.subm_names =[a.split(".")[0] for a in self.img_names ]
    
    ## We make different transforms when evaluating  
    # PyTorch transforms
    self.transform = transforms.Compose([transforms.Resize((224,224)) ,transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
                                        

                                                                                                                
  
  def __len__(self):
    return len(self.img_names)
    
  def __getitem__(self,i):
    return self._read_img(i) , int(self.subm_names[i])
  
  def _read_img(self, i):
    img =Image.open(self.img_dir + "/" + self.img_names[i])

    
    return self.transform(img)

In [None]:
training_data = FoodDataset_training_class("/kaggle/input/polytech-ds-2019/polytech-ds-2019/training")
test_data = FoodDataset_test_class("/kaggle/input/polytech-ds-2019/polytech-ds-2019/kaggle_evaluation")
validation_data = FoodDataset_eval_class("/kaggle/input/polytech-ds-2019/polytech-ds-2019/validation")

In [None]:
food, label = training_data[1100]
display_tensor(food)
label

In [None]:
BATCH_SIZE = 32
NUM_WORKERS = 4

In [None]:
training_dl = torch.utils.data.DataLoader(training_data, batch_size = BATCH_SIZE , shuffle= True , num_workers= NUM_WORKERS)
validation_dl = torch.utils.data.DataLoader(validation_data,batch_size=BATCH_SIZE,shuffle=True ,num_workers=NUM_WORKERS)
test_dl = torch.utils.data.DataLoader(test_data,batch_size=BATCH_SIZE,num_workers=NUM_WORKERS)

In [None]:
transforms = tta.Compose(
    [
        tta.HorizontalFlip(),
        tta.Rotate90(angles=[0, 180]),
        tta.Scale(scales=[1, 2, 4]),
        tta.Multiply(factors=[0.9, 1, 1.1]),        
    ]
)

The resnet101 model is trained on Imagenet database, so it get 1000 output for the classifier(fully connected layer). In our food11 dataset it only have 11 categories, so we must modify the classifier of this model so the output vector would be 11 instead of 1000.

In [None]:
model = torchvision.models.wide_resnet101_2(pretrained=True,progress=True)
print(model)
model.fc=nn.Sequential(nn.Linear(2048,1024),nn.ReLU(),nn.Dropout2d(p=0.5),nn.Linear(1024,11),nn.LogSoftmax(dim=1))
#tta_model = tta.ClassificationTTAWrapper(model, transforms,merge_mode='mean')
#print(model)


# Training part 

In [None]:
## Training the model
LEARNING_RATE = 0.001
Model =model.cuda()
criterion =nn.NLLLoss()
optimizer =torch.optim.SGD( Model.parameters(),lr=LEARNING_RATE,momentum=0.9,weight_decay=0.004)


In [None]:
NB_EPOCHS = 9
epoch_loss, epoch_acc, epoch_val_loss, epoch_val_acc = [], [],[], []

for e in range(NB_EPOCHS):
    print("Epoch :", e)
    running_loss = 0
    running_accuracy = 0
    
    Model.train()
    
    for i ,batch in enumerate(training_dl):
        x = batch[0]
        Labels = batch[1]
        
        x= x.cuda()
        Labels = Labels.cuda()
        
        y = Model(x)
        loss =criterion(y,Labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        with torch.no_grad():
            running_loss +=loss.item()
            running_accuracy += (y.max(1)[1] == Labels).sum().item()
    print("Training accuracy: " , running_accuracy/float(len(training_data)) ,
         "Training loss: ", running_loss/len(training_data))
    epoch_loss.append(running_loss/len(training_data) )
    epoch_acc.append(running_accuracy/len(training_data))
    
    
    Model.eval()
    
    running_val_loss = 0
    running_val_accuracy = 0
    
    for i ,batch in enumerate(validation_dl):
        with torch.no_grad():
            x = batch[0]
            Labels = batch[1]
        
            x= x.cuda()
            Labels = Labels.cuda()
        
            y = Model(x)
            loss =criterion(y,Labels)

        
        
            running_val_loss +=loss.item()
            running_val_accuracy += (y.max(1)[1] == Labels).sum().item()
    print("validation accuracy: " , running_val_accuracy/float(len(validation_data)) ,
         "validation loss: ", running_val_loss/len(validation_data))
    epoch_val_loss.append(running_val_loss/len(validation_data) )
    epoch_val_acc.append(running_val_accuracy/len(validation_data))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(NB_EPOCHS)
plt.figure()
plt.plot(x, epoch_acc, x, epoch_val_acc)

plt.figure()
plt.plot(x, epoch_loss, x, epoch_val_loss)

In [None]:
predictions = []
image_names = []

Model.eval()

for i ,batch in enumerate(test_dl):
    with torch.no_grad():
        x = batch[0]
        Id = batch[1]
        
        x= x.cuda()
        
        
        y = Model(x)
        
    predictions.append(y.max(1)[1])
    image_names.append(Id)
            

In [None]:
names =[]
for i in image_names :
    names = names + (list(i.numpy()))
len(names)

In [None]:
predict=[]
for i in predictions :
    predict = predict + (list(i.cpu().numpy()))
len(predict)

In [None]:
list_of_tup = list(zip(names,predict))

# Submission part 


In [None]:
submission = pd.DataFrame(list_of_tup,columns=['Id','Category'])
submission



In [None]:
submission.to_csv("goldenboys.csv",index=False ,encoding ='utf-8')

In [None]:
submission