# Food prediction model: L1

In [None]:
# import packages 
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers


from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import random

import matplotlib.pyplot as plt
import seaborn as sns
from keras.utils.vis_utils import plot_model


import cv2
import os
import gc
import random
import matplotlib.image as mpimg

## EfficientnetB2

In [None]:
model_name = 'J1'

## Load labels and paths

In [None]:
# file location
train_label_path = '/kaggle/input/food-recognition-challenge-2021/train_labels.csv'
train_image_path = '/kaggle/input/food-recognition-challenge-2021/train_set/train_set/'
test_image_path= '/kaggle/input/food-recognition-challenge-2021/test_set'
test_results_sample = '/kaggle/input/food-recognition-challenge-2021/sample.csv'


# importing files
df_train_labels = pd.read_csv(train_label_path, sep=',')
df_test_labels = pd.read_csv(test_results_sample, sep=',')

sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

## Data pre-procession

In [None]:
# Adding the file paths to the dataframe for train
path_list = []
for i in range(len(df_train_labels)):
    path = '{}'.format(df_train_labels['img_name'][i])
    path_list.append(path)

    
df_train_labels['path'] = path_list
df_train_labels['label'] = df_train_labels['label'].astype(str)

del path_list

# Adding the file paths to the dataframe for test
path_list = []
for i in range(len(df_test_labels)):
    path = '{}'.format(df_test_labels['img_name'][i])
    path_list.append(path)

    
df_test_labels['path'] = path_list
df_test_labels['label'] = df_test_labels['label'].astype(str)


## Train and validation set

In [None]:
random.seed(10)

image_train, image_validation = train_test_split(df_train_labels,
                                                 test_size=0.3, 
                                                 random_state=1, 
                                                 stratify=df_train_labels['label'])


In [None]:
#image_train.head()

# Loading images training and validation

In [None]:
# setting image parameters
img_height = 224 
img_width = 224
image_size = (img_height,img_width)
batch_size = 85

# defining Generators
datagen = ImageDataGenerator()
dataval = ImageDataGenerator()
test = ImageDataGenerator()

# parameters train data
train_images = datagen.flow_from_dataframe(
    image_train,
    directory = train_image_path,
    class_mode='categorical',
    x_col='path',
    y_col='label',
    shuffle=True,
    target_size=(img_height, img_width),
    batch_size=batch_size
)

#  parameters validation data
validation_images = dataval.flow_from_dataframe(
    image_validation,
    directory = train_image_path,
    class_mode='categorical',
    x_col='path',
    y_col='label',
    shuffle=True,
    target_size=(img_height, img_width),
    batch_size=batch_size
)

#  parameters test data
test_generator = test.flow_from_directory(
        test_image_path,
        target_size=image_size,
        batch_size=batch_size,
        shuffle=False
)

## Model design

Model
* EFFicientNetB2/ run time: 2701
* ResNet50/ run time: 2759
* MobileNetV2/ run time: 2220
* Xception/ run time: 2534
* VGG16/ run time: 2774

In [None]:
# Number of output layers/classes
num_classes = 80

#Setting the input size
input_shape = (img_height,img_width,3)

# running dif model
model = tf.keras.applications.EfficientNetB2( 
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=input_shape,
    pooling="avg",
    classes=num_classes)

# Specify first layer as non-trainable
model.trainable = False
inputs = model.input

# x = tf.keras.layers.Dense(128, activation='relu')
# x = tf.keras.layers.Dense(128, activation='relu')(x)

outputs = tf.keras.layers.Dense(80, activation='softmax')(model.output)
model = tf.keras.Model(inputs, outputs)

# compiling model
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              metrics=['accuracy'])



In [None]:
# model overview
model.summary()


## Model training

In [None]:
# setting the location for train results
checkpoint_path = 'training_1/cp.ckpt'


In [None]:
# time time
import time

start = time.time()

# setting the number of training iterations
epochs=20

# saving substeps
chkp = tf.keras.callbacks.ModelCheckpoint('Model_name.h5', monitor='val_accuracy', save_best_only=True)

# initiate training
history = model.fit(
  train_images,
  validation_data = validation_images,
  epochs=epochs,
  callbacks=[chkp]
)
print(time.time() - start)

## Accuracy plots

In [None]:
# subsetting all hyper results
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

# plotting Training and Validation Accuracy
plt.figure(figsize=(14, 14))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

# plotting the Training and Validation Loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
plt.savefig('testim.png')

## Predicting on the testset

In [None]:
def predict_for_test(model):
    '''
    This functions tests the model on the test set and 
    automaticaly maps the resulting labels to a .csv file.
    
    '''
    # getting labels
    labels = (train_images.class_indices)
    labels = dict((v,k) for k,v in labels.items())

    # predicting on the test set
    preds = model.predict(test_generator)
    preds_cls_idx = preds.argmax(axis=-1)
    predictions = [labels[k] for k in preds_cls_idx]

    #m apping predictions and save as df
    filenames=test_generator.filenames
    filenames = [x.replace('test_set/', '') for x in filenames]
    results=pd.DataFrame({"img_name":filenames,
                          "label":predictions})
    return results 

test_results_pr = predict_for_test(model)
test_results = df_test_labels.merge(test_results_pr, how='left', on='img_name')
test_results = test_results[['path', 'label_y']].rename(columns = {'label_y':'label', 'path':'img_name'})


# writing pridictions to csv
test_results.to_csv("sample_submission_model_J1.csv", index=False)


In [None]:
# showing the prediction dist
plt.figure(figsize=(18,10))
sns.countplot(x='label', data=test_results).set(title='Count per label')
plt.show()
# plt.savefig('model_1_label_dist.png')
