In [None]:
import matplotlib.image as img
import matplotlib.pyplot as plt
%matplotlib inline # This tells the IPython environment to draw the plots immediately after the current cell

import numpy as np
from collections import defaultdict
import collections
from shutil import copy
from shutil import copytree, rmtree
import os
import random
from PIL import Image
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras import regularizers
import tensorflow.keras.backend as K
from tensorflow.keras.applications.inception_v3 import InceptionV3 # The deep learning training architecture used
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, AveragePooling2D
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import l2

In [None]:
# Make sure tensorflow's version is 1.13.1
print(tf.__version__)

In [None]:
# Helper function to download data and extract
def get_data_extract():
  if "food-101" in os.listdir():
    print("Dataset already exists")
  else:
    print("Downloading the data...")
    !wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
    print("Dataset downloaded!")
    print("Extracting data..")
    !tar xzvf food-101.tar.gz
    print("Extraction done!")

In [None]:
# Download data and extract it to folder
get_data_extract()

In [None]:
# Get food list
data_dir = "food-101/images/"
foods_sorted = sorted(os.listdir(data_dir))
print(foods_sorted)

In [None]:
# Helper method to split dataset into train and test folders
def prepare_data(filepath, src,dest):
  classes_images = defaultdict(list)
  with open(filepath, 'r') as txt:
      paths = [read.strip() for read in txt.readlines()]
      for p in paths:
        food = p.split('/')
        classes_images[food[0]].append(food[1] + '.jpg')

  for food in classes_images.keys():
    print("\nCopying images into ",food)
    if not os.path.exists(os.path.join(dest,food)):
      os.makedirs(os.path.join(dest,food))
    for i in classes_images[food]:
      copy(os.path.join(src,food,i), os.path.join(dest,food,i))
  print("Copying Done!")

In [None]:
# Prepare train dataset by copying images from food-101/images to food-101/train using the file train.txt
print("Creating train data...")
prepare_data('food-101/meta/train.txt', 'food-101/images', 'train')
print()
# Prepare test data by copying images from food-101/images to food-101/test using the file test.txt
print("Creating test data...")
prepare_data('food-101/meta/test.txt', 'food-101/images', 'test')

In [None]:
# Helper method to create train_mini and test_mini data samples
def dataset_mini(food_list, src, dest):
  if os.path.exists(dest):
    rmtree(dest) # Removing dataset_mini (if it already exists) folders so that we will have only the classes that we want
  os.makedirs(dest)
  for food_item in food_list :
    print("Copying images into",food_item)
    copytree(os.path.join(src,food_item), os.path.join(dest,food_item))

In [None]:
# Picking 4 food items and generating separate data folders for the same
food_list = ['french_fries','fried_rice','grilled_salmon','steak']
src_train = 'train'
dest_train = 'train_mini'
src_test = 'test'
dest_test = 'test_mini'

In [None]:
print("Creating train data folder with new classes")
dataset_mini(food_list, src_train, dest_train)
print("Done")
print()
print("Creating test data folder with new classes")
dataset_mini(food_list, src_test, dest_test)
print("Done")

In [None]:
print("Total number of samples in train folder")
!find train_mini -type d -or -type f -printf '.' | wc -c
print("Total number of samples in test folder")
!find test_mini -type d -or -type f -printf '.' | wc -c

In [None]:
K.clear_session()
n_classes = 4
img_width, img_height = 299, 299
train_data_dir = 'train_mini'
validation_data_dir = 'test_mini'
nb_train_samples = 3000 #75750
nb_validation_samples = 1000 #25250
batch_size = 16

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

# Model
if 'model' in locals():
    del model
tf.keras.backend.clear_session()

inception = InceptionV3(weights='imagenet', include_top=False)
x = inception.output
x = GlobalAveragePooling2D()(x)
x = Dense(128,activation='relu')(x)
x = Dropout(0.2)(x)

predictions = Dense(n_classes,kernel_regularizer=regularizers.l2(0.005), activation='softmax')(x)

model = Model(inputs=inception.input, outputs=predictions)
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# Load the saved weights of the model and continue training
model.load_weights('model/best_model_4class.hdf5')

# Callbacks
checkpointer = ModelCheckpoint(filepath='model/best_model_4class.hdf5', verbose=1, save_best_only=True)
csv_logger = CSVLogger('model/history_4class.log')

# Train the model
history = model.fit_generator(train_generator,
                    steps_per_epoch = nb_train_samples // batch_size,
                    validation_data=validation_generator,
                    validation_steps=nb_validation_samples // batch_size,
                    epochs=20,
                    initial_epoch=18,
                    verbose=1,
                    callbacks=[csv_logger, checkpointer])

model.save('model/model_trained_4class.hdf5')

In [None]:
class_map_4 = train_generator.class_indices
print(class_map_4)

In [None]:
# PLOT RESULTS SHOW OVERFITTING. BUT THIS IS BECAUSE I USED 4 LABELS TO TRAIN THE MODEL AND BATCH SIZE OF 64 INSTEAD OF 16 - NOT IMPORTANT AT THE MOMENT.
# LATER: NEED TO RE-TRAIN TO *SHOW* BETTER RESULTS

# Load the data from the .txt file into a pandas dataframe
history_4class = pd.read_csv('model/history_4class.txt')

# Plot the accuracy
plt.plot(history_4class['epoch'], history_4class['acc'], label='train_accuracy')
plt.plot(history_4class['epoch'], history_4class['val_acc'], label='validation_accuracy')
plt.title('Accuracy Plot')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot the loss
plt.plot(history_4class['epoch'], history_4class['loss'], label='train_loss')
plt.plot(history_4class['epoch'], history_4class['val_loss'], label='validation_loss')
plt.title('Loss Plot')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Loading the best saved model to make predictions
K.clear_session()
model_best = load_model('model/model_trained_4class.hdf5',compile = False)

In [None]:
# Fill the food list manually 
food_list = ['french_fries','fried_rice','grilled_salmon','steak']

In [None]:
def predict_class(model, images, show = True):
  for img in images:
    img = image.load_img(img, target_size=(299, 299))
    img = image.img_to_array(img)                    
    img = np.expand_dims(img, axis=0)         
    img /= 255.                                      

    pred = model.predict(img)
    index = np.argmax(pred)
    food_list.sort()
    pred_value = food_list[index]
    if show:
        plt.imshow(img[0])                           
        plt.axis('off')
        plt.title(pred_value)
        plt.show()

In [None]:
# Make a list of downloaded images and test the trained model
images = []
images.append('test_images/ff.jpg')
images.append('test_images/fr.jpg')
images.append('test_images/gs.jpg')
images.append('test_images/s.jpg')

predict_class(model_best, images, True)