<a href="https://colab.research.google.com/github/AnandaIlyasa/bangkit-capstone-bahanbaku/blob/3-combined-dataset/notebooks/food_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import modules

In [1]:
from __future__ import absolute_import, division, print_function

import tensorflow as tf

import tensorflow.keras.backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import EfficientNetB7
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import callbacks
from tensorflow.keras.optimizers import Adam

from tensorflow.keras import models
from tensorflow.keras.applications.efficientnet import preprocess_input

import cv2
import os
import random
import collections
from collections import defaultdict

from shutil import copy
from shutil import copytree, rmtree

import numpy as np

import matplotlib.pyplot as plt
import matplotlib.image as img
%matplotlib inline

Download food-101 dataset and extract

In [2]:
def get_data_extract():
  if "food-101" in os.listdir():
    print("Dataset already exists")
  else:
    tf.keras.utils.get_file(
    'food-101.tar.gz',
    'http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz',
    cache_subdir='/content',
    extract=True,
    archive_format='tar',
    cache_dir=None
    )
    print("Dataset downloaded and extracted!")

In [3]:
get_data_extract()

Downloading data from http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
Dataset downloaded and extracted!


Mount drive to save checkpoint and load dataset

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Download additional Indonesian food dataset from kaggle

In [5]:
! pip install kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [6]:
! mkdir ~/.kaggle

Required to upload kaggle.json file first to current directory (/content)
-- kaggle.json file contains api token and can be downloaded from kaggle acount page

In [7]:
! cp kaggle.json ~/.kaggle/

Define subset of food classes that will be used and create separate folders to store them

In [8]:
! chmod 600 ~/.kaggle/kaggle.json

Download local food datasets from kaggle and extract

In [9]:
! kaggle datasets download -d theresalusiana/indonesian-food

Downloading indonesian-food.zip to /content
 99% 666M/673M [00:05<00:00, 103MB/s]
100% 673M/673M [00:05<00:00, 124MB/s]


In [10]:
! kaggle datasets download -d arizbw/traditional-food-knowledge-of-indonesia

Downloading traditional-food-knowledge-of-indonesia.zip to /content
100% 3.10G/3.10G [00:23<00:00, 164MB/s]
100% 3.10G/3.10G [00:23<00:00, 141MB/s]


In [11]:
! unzip -q /content/indonesian-food.zip

Required to upload kaggle.json file first to current directory (/content)
-- kaggle.json file contains api token and can be downloaded from kaggle acount page

In [12]:
! unzip -q /content/traditional-food-knowledge-of-indonesia.zip

Create combined_dataset folder to accommodate all data from different datasets

In [13]:
! mkdir ./combined_dataset

Preparing food-tfk-images datasets so that each food class has it's own folder

In [14]:
def prepare_food_img_data(filepath, src, dest):
  image_classes = defaultdict(list)
  with open(filepath, 'r') as csv:
      csv.readline()
      lines = [read.strip() for read in csv.readlines()]
      for line in lines:
        file_desc = line.split(',')[:3]
        image_classes[file_desc[2]].append(file_desc[0])

  if not os.path.exists(dest):
    os.makedirs(dest)

  for food in image_classes.keys():
    if not os.path.exists(os.path.join(dest,food)):
      os.makedirs(os.path.join(dest,food))
    for i in image_classes[food]:
      copy(os.path.join(src,i), os.path.join(dest,food,i))
  print("Copying Done!")
  # return image_classes

Copy food-tfk-images dataset to combined_dataset folder

In [15]:
print("Preparing food-tfk-images datasets...")
prepare_food_img_data('/content/train.csv', '/content/food-tfk-images', '/content/combined_dataset')
prepare_food_img_data('/content/dev.csv', '/content/food-tfk-images', '/content/combined_dataset')
prepare_food_img_data('/content/test.csv', '/content/food-tfk-images', '/content/combined_dataset')

Preparing food-tfk-images datasets...
Copying Done!
Copying Done!
Copying Done!


Copy indonesian-food dataset to combined_dataset folder

Firstly, rename gado to gado-gado to standardize

In [16]:
! mv /content/dataset/train/gado /content/dataset/train/gado-gado

In [17]:
! mv /content/dataset/test/gado /content/dataset/test/gado-gado

In [18]:
! mv /content/dataset/valid/gado /content/dataset/valid/gado-gado

In [19]:
! cp -a /content/dataset/train/gado-gado/. /content/combined_dataset/gado-gado

In [20]:
! cp -a /content/dataset/train/gudeg/. /content/combined_dataset/gudeg

In [21]:
! cp -a /content/dataset/train/rendang/. /content/combined_dataset/rendang

In [22]:
! cp -a /content/dataset/test/gado-gado/. /content/combined_dataset/gado-gado

In [23]:
! cp -a /content/dataset/test/gudeg/. /content/combined_dataset/gudeg

In [24]:
! cp -a /content/dataset/test/rendang/. /content/combined_dataset/rendang

In [25]:
! cp -a /content/dataset/valid/gado-gado/. /content/combined_dataset/gado-gado

In [26]:
! cp -a /content/dataset/valid/gudeg/. /content/combined_dataset/gudeg

In [27]:
! cp -a /content/dataset/valid/rendang/. /content/combined_dataset/rendang

In [28]:
! cp -a /content/dataset/train/. /content/combined_dataset

In [29]:
! cp -a /content/dataset/test/. /content/combined_dataset

Check if indonesian food dataset already in food-101 folder how many bakso images are there

In [30]:
! cp -a /content/dataset/valid/. /content/combined_dataset

In [31]:
!ls -l /content/combined_dataset/bakso | wc -l

2155


Copy subset of food-101 dataset to combined_dataset folder

In [32]:
def copy_subset_dataset_to_combined(food_list, src, dest):
  for food_item in food_list :
    if not os.path.exists(os.path.join(dest,food_item)):
      print("Copying images into ",food_item)
      copytree(os.path.join(src,food_item), os.path.join(dest,food_item))
    else :
      print("Copying images into ",food_item)
      for i in os.listdir(os.path.join(src,food_item)):
        copy(os.path.join(src,food_item,i), os.path.join(dest,food_item,i))

In [33]:
food_101_subset = ['apple_pie', 'bibimbap', 'bread_pudding', 'cheesecake', 'chicken_curry', 'chicken_wings', 'chocolate_cake', 'french_fries', 'garlic_bread', 'gnocchi', 'hamburger', 'omelette', 'pizza', 'samosa', 'shrimp_and_grits', 'strawberry_shortcake', 'tacos', 'tiramisu', 'tuna_tartare', 'waffles']
copy_subset_dataset_to_combined(food_101_subset, '/content/food-101/images', '/content/combined_dataset')

Copying images into  apple_pie
Copying images into  bibimbap
Copying images into  bread_pudding
Copying images into  cheesecake
Copying images into  chicken_curry
Copying images into  chicken_wings
Copying images into  chocolate_cake
Copying images into  french_fries
Copying images into  garlic_bread
Copying images into  gnocchi
Copying images into  hamburger
Copying images into  omelette
Copying images into  pizza
Copying images into  samosa
Copying images into  shrimp_and_grits
Copying images into  strawberry_shortcake
Copying images into  tacos
Copying images into  tiramisu
Copying images into  tuna_tartare
Copying images into  waffles


Get food labels

In [34]:
data_dir = "/content/combined_dataset"
all_foods_sorted = sorted(os.listdir(data_dir))
for food in all_foods_sorted:
  print(f'\"{food}', end="\", ")
print()
print(len(all_foods_sorted))

"apple_pie", "asinan-jakarta", "ayam-betutu", "ayam-bumbu-rujak", "ayam-goreng-lengkuas", "bakso", "bibimbap", "bika-ambon", "bir-pletok", "bread_pudding", "bubur-manado", "cendol", "cheesecake", "chicken_curry", "chicken_wings", "chocolate_cake", "es-dawet", "french_fries", "gado-gado", "garlic_bread", "gnocchi", "gudeg", "gulai-ikan-mas", "hamburger", "keladi", "kerak-telor", "klappertart", "kolak", "kue-lumpur", "kunyit-asam", "laksa-bogor", "lumpia-semarang", "mie-aceh", "nagasari", "omelette", "papeda", "pempek-palembang", "pizza", "rawon-surabaya", "rendang", "rujak-cingur", "samosa", "sate", "sate-ayam-madura", "sate-lilit", "sate-maranggi", "shrimp_and_grits", "soerabi", "soto-ayam-lamongan", "soto-banjar", "strawberry_shortcake", "tacos", "tahu-telur", "tiramisu", "tuna_tartare", "waffles", 
56


Preparing Data (Transform and Load data from directory)

In [42]:
# n_classes,num_epochs, nb_train_samples,nb_validation_samples = 25, 2, train_files, test_files
def prepare_data_input_pipeline():
  K.clear_session()

  img_width, img_height = 150, 150
  data_dir = './combined_dataset'

  datagen = ImageDataGenerator(
      preprocessing_function=preprocess_input,
      shear_range=0.2,
      validation_split=0.06,
      zoom_range=0.2,
      horizontal_flip=True)

  train_generator = datagen.flow_from_directory(
      data_dir,
      target_size=(img_height, img_width),
      batch_size=30,
      subset='training',
      class_mode='categorical')

  validation_generator = datagen.flow_from_directory(
      data_dir,
      target_size=(img_height, img_width),
      batch_size=30,
      subset='validation',
      class_mode='categorical')
  
  return train_generator, validation_generator

Load pre-trained model, take some layers from it, and define additional layer on top of it

In [37]:
# for layer in inception.layers:
#   layer.trainable = False
# last_layer = inception.get_layer('mixed7')
# last_output = last_layer.output
def prepare_model(n_classes):
  # bestmodel_path = 'bestmodel_'+str(n_classes)+'class.hdf5'

  efficient_net = EfficientNetB7(weights='imagenet', include_top=False, classes=n_classes)
  for layer in efficient_net.layers:
    layer.trainable = False
  last_layer = efficient_net.get_layer('block6a_project_conv')
  last_output = last_layer.output

  x = last_output
  x = GlobalAveragePooling2D()(x)
  x = Dense(1024, activation='relu')(x)
  x = Dropout(0.3)(x)               
  x = Dense(n_classes, activation='softmax')(x) 
  model = Model(efficient_net.input, x)

  optimizer = Adam(learning_rate=0.001)

  model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

  return model

Train the model and create checkpoint to continue the training later

In [43]:
n_classes=len(all_foods_sorted) #56

train_generator, validation_generator = prepare_data_input_pipeline()

model = prepare_model(n_classes)

checkpoint = callbacks.ModelCheckpoint('/content/drive/MyDrive/Google_Bangkit/capstone/checkpoint_'+str(n_classes)+'_classes', save_best_only=True)

# model.summary()

Found 28907 images belonging to 56 classes.
Found 1825 images belonging to 56 classes.


Continue training from the checkpoint

In [44]:
history = model.fit(train_generator,
                    validation_data=validation_generator,
                    epochs=5,
                    verbose=1,
                    callbacks=[checkpoint])

# model.save_weights('weights')
# class_map = train_generator.class_indices

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Continue training from the checkpoint

In [None]:
# continue training
n_classes=len(all_foods_sorted) #56
train_generator, validation_generator = prepare_data_input_pipeline()
checkpoint = callbacks.ModelCheckpoint('/content/drive/MyDrive/Google_Bangkit/capstone/checkpoint_'+str(n_classes)+'_classes', save_best_only=True)
loaded_model = load_model('/content/drive/MyDrive/Google_Bangkit/capstone/checkpoint_'+str(n_classes)+'_classes') # checkpoint folder from gdrive
new_history = loaded_model.fit(train_generator,
                    validation_data=validation_generator,
                    epochs=5,
                    verbose=1,
                    callbacks=[checkpoint])

Found 28907 images belonging to 56 classes.
Found 1825 images belonging to 56 classes.
Epoch 1/5
Epoch 2/5

# Visualize the accuracy and loss plots

In [None]:
def plot_accuracy(history,title):
    plt.title(title)
    plt.plot(history.history['accuracy']) # change acc to accuracy if testing TF 2.0
    plt.plot(history.history['val_accuracy']) # change val_accuracy if testing TF 2.0
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train_accuracy', 'validation_accuracy'], loc='best')
    plt.show()


def plot_loss(history,title):
    plt.title(title)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train_loss', 'validation_loss'], loc='best')
    plt.show()

plot_accuracy(new_history,'accuracy')
plot_loss(new_history,'loss')