<a href="https://colab.research.google.com/github/AnandaIlyasa/bangkit-capstone-bahanbaku/blob/main/food_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from __future__ import absolute_import, division, print_function

import tensorflow as tf

import tensorflow.keras.backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import EfficientNetB7
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import callbacks
from tensorflow.keras.optimizers import Adam

from tensorflow.keras import models
from tensorflow.keras.applications.efficientnet import preprocess_input

import cv2
import os
import random
import collections
from collections import defaultdict

from shutil import copy
from shutil import copytree, rmtree

import numpy as np

import matplotlib.pyplot as plt
import matplotlib.image as img
%matplotlib inline

In [None]:
# Clone tensorflow/examples repo which has images to evaluate trained model
!git clone https://github.com/tensorflow/examples.git

Cloning into 'examples'...
remote: Enumerating objects: 21431, done.[K
remote: Counting objects: 100% (92/92), done.[K
remote: Compressing objects: 100% (74/74), done.[K
remote: Total 21431 (delta 37), reused 50 (delta 16), pack-reused 21339[K
Receiving objects: 100% (21431/21431), 35.60 MiB | 13.68 MiB/s, done.
Resolving deltas: 100% (11793/11793), done.


In [None]:
def get_data_extract():
  if "food-101" in os.listdir():
    print("Dataset already exists")
  else:
    tf.keras.utils.get_file(
    'food-101.tar.gz',
    'http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz',
    cache_subdir='/content',
    extract=True,
    archive_format='tar',
    cache_dir=None
    )
    print("Dataset downloaded and extracted!")

In [None]:
get_data_extract()

Downloading data from http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
Dataset downloaded and extracted!


In [None]:
data_dir = "food-101/images/"
foods_sorted = sorted(os.listdir(data_dir))

In [None]:
def prepare_data(filepath, src,dest):
  classes_images = defaultdict(list)
  with open(filepath, 'r') as txt:
      paths = [read.strip() for read in txt.readlines()]
      for p in paths:
        food = p.split('/')
        classes_images[food[0]].append(food[1] + '.jpg')

  for food in classes_images.keys():
    if not os.path.exists(os.path.join(dest,food)):
      os.makedirs(os.path.join(dest,food))
    for i in classes_images[food]:
      copy(os.path.join(src,food,i), os.path.join(dest,food,i))
  print("Copying Done!")

In [None]:
print("Creating train data...")
prepare_data('food-101/meta/train.txt', 'food-101/images', 'food-101/train')

Creating train data...
Copying Done!


In [None]:
print("Creating test data...")
prepare_data('food-101/meta/test.txt', 'food-101/images', 'food-101/test')

Creating test data...
Copying Done!


In [None]:
# train_files = sum([len(files) for i, j, files in os.walk("food-101/train")])
# print("Total number of samples in train folder")
# print(train_files)

In [None]:
# test_files = sum([len(files) for i, j, files in os.walk("food-101/test")])
# print("Total number of samples in test folder")
# print(test_files)

In [None]:
def dataset_mini(food_list, src, dest):
  if os.path.exists(dest):
    rmtree(dest) # removing dataset_mini(if it already exists) folders so that we will have only the classes that we want
  os.makedirs(dest)
  for food_item in food_list :
    print("Copying images into",food_item)
    copytree(os.path.join(src,food_item), os.path.join(dest,food_item))

In [None]:
food_list = ["apple_pie","bakso","bibimbap","bread_pudding","cheesecake","chicken_curry","chicken_wings","chocolate_cake","french_fries","donat","garlic_bread","gnocchi","gudeg","hamburger","omelette","pizza","cakwe","samosa","batagor","shrimp_and_grits","strawberry_shortcake","tacos","tiramisu","tuna_tartare","waffles"]
# food_list = ["apple_pie","baklava","bibimbap","bread_pudding","cheesecake","chicken_curry","chicken_wings","chocolate_cake","donuts","french_fries","fried_rice","garlic_bread","gnocchi","hamburger","hot_dog","lasagna","nachos","omelette","pancakes","panna_cotta","pizza","ramen","ravioli","red_velvet_cake","risotto","samosa","sashimi","shrimp_and_grits","steak","strawberry_shortcake","sushi","tacos","tiramisu","tuna_tartare","waffles"]
src_train = 'food-101/train'
dest_train = 'food-101/train_mini'
src_test = 'food-101/test'
dest_test = 'food-101/test_mini'

In [None]:
# food_list.sort()
# print("\"",end="")
# for food in food_list:
#   print(food, end="\",\"")
# print()
# print(len(food_list))

In [None]:
! pip install kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download -d robertusbagaskara/indonesian-food-image

Downloading indonesian-food-image.zip to /content
 99% 235M/237M [00:10<00:00, 23.9MB/s]
100% 237M/237M [00:10<00:00, 24.0MB/s]


In [None]:
! unzip /content/indonesian-food-image.zip

Archive:  /content/indonesian-food-image.zip
  inflating: dataset - formated/classes.names  
  inflating: dataset - formated/classes.txt  
  inflating: dataset - formated/test/air/air_test (1).jpg  
  inflating: dataset - formated/test/air/air_test (1).txt  
  inflating: dataset - formated/test/air/air_test (2).jpg  
  inflating: dataset - formated/test/air/air_test (2).txt  
  inflating: dataset - formated/test/air/air_test (3).jpg  
  inflating: dataset - formated/test/air/air_test (3).txt  
  inflating: dataset - formated/test/air/air_test (4).jpg  
  inflating: dataset - formated/test/air/air_test (4).txt  
  inflating: dataset - formated/test/air/air_test (5).jpg  
  inflating: dataset - formated/test/air/air_test (5).txt  
  inflating: dataset - formated/test/air/air_test (6).jpg  
  inflating: dataset - formated/test/air/air_test (6).txt  
  inflating: dataset - formated/test/air/air_test (7).jpg  
  inflating: dataset - formated/test/air/air_test (7).txt  
  inflating: dataset 

In [None]:
! cp -a /content/dataset\ -\ formated/train/. /content/food-101/train

In [None]:
# ! cp -a /content/dataset/valid/. /content/food-101/train

In [None]:
! cp -a /content/dataset\ -\ formated/test/. /content/food-101/test

In [None]:
# !ls -l /content/food-101/images/bakso | wc -l

In [None]:
dataset_mini(food_list, src_train, dest_train)
train_files = sum([len(files) for i, j, files in os.walk("food-101/train_mini")])
print(train_files)

Copying images into apple_pie
Copying images into bakso
Copying images into bibimbap
Copying images into bread_pudding
Copying images into cheesecake
Copying images into chicken_curry
Copying images into chicken_wings
Copying images into chocolate_cake
Copying images into french_fries
Copying images into donat
Copying images into garlic_bread
Copying images into gnocchi
Copying images into gudeg
Copying images into hamburger
Copying images into omelette
Copying images into pizza
Copying images into cakwe
Copying images into samosa
Copying images into batagor
Copying images into shrimp_and_grits
Copying images into strawberry_shortcake
Copying images into tacos
Copying images into tiramisu
Copying images into tuna_tartare
Copying images into waffles
15234


In [None]:
dataset_mini(food_list, src_test, dest_test)
test_files = sum([len(files) for i, j, files in os.walk("food-101/test_mini")])
print(test_files)

Copying images into apple_pie
Copying images into bakso
Copying images into bibimbap
Copying images into bread_pudding
Copying images into cheesecake
Copying images into chicken_curry
Copying images into chicken_wings
Copying images into chocolate_cake
Copying images into french_fries
Copying images into donat
Copying images into garlic_bread
Copying images into gnocchi
Copying images into gudeg
Copying images into hamburger
Copying images into omelette
Copying images into pizza
Copying images into cakwe
Copying images into samosa
Copying images into batagor
Copying images into shrimp_and_grits
Copying images into strawberry_shortcake
Copying images into tacos
Copying images into tiramisu
Copying images into tuna_tartare
Copying images into waffles
5056


In [None]:
# n_classes,num_epochs, nb_train_samples,nb_validation_samples = 25, 2, train_files, test_files
def prepare_data():
  K.clear_session()

  img_width, img_height = 300, 300
  train_data_dir = 'food-101/train_mini'
  validation_data_dir = 'food-101/test_mini'
  batch_size = 10

  train_datagen = ImageDataGenerator(
      preprocessing_function=preprocess_input,
      shear_range=0.2,
      validation_split=0.85,
      zoom_range=0.2,
      horizontal_flip=True)

  test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input, validation_split=0.85)

  train_generator = train_datagen.flow_from_directory(
      train_data_dir,
      target_size=(img_height, img_width),
      batch_size=batch_size,
      subset='training',
      class_mode='categorical')

  validation_generator = test_datagen.flow_from_directory(
      validation_data_dir,
      target_size=(img_height, img_width),
      batch_size=batch_size,
      subset='training',
      class_mode='categorical')
  
  return train_generator, validation_generator

In [None]:
# for layer in inception.layers:
#   layer.trainable = False
# last_layer = inception.get_layer('mixed7')
# last_output = last_layer.output
def prepare_model(n_classes, nb_train_samples,nb_validation_samples):
  # bestmodel_path = 'bestmodel_'+str(n_classes)+'class.hdf5'
 
  efficient_net = EfficientNetB7(weights='imagenet', include_top=False, classes=n_classes)
  for layer in efficient_net.layers:
    layer.trainable = False
  last_layer = efficient_net.get_layer('block6a_project_conv')
  last_output = last_layer.output
 
  x = last_output
  x = GlobalAveragePooling2D()(x)
  x = Dense(64, activation='relu')(x)
  x = Dense(128, activation='relu')(x)
  # x = Dense(256, activation='relu')(x)
  # x = Dense(512, activation='relu')(x)
  x = Dropout(0.3)(x)         
  x = Dense(n_classes, activation='softmax')(x) 
  model = Model(efficient_net.input, x)
 
  optimizer = Adam(learning_rate=0.001)
 
  model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
 
  return model

In [None]:
n_classes=25

train_generator, validation_generator = prepare_data()

model = prepare_model(n_classes, train_files, test_files)

checkpoint = callbacks.ModelCheckpoint('checkpoint_'+str(n_classes), save_best_only=True)

history = model.fit(train_generator,
                    validation_data=validation_generator,
                    epochs=2,
                    verbose=1,
                    callbacks=[checkpoint])

# model.save_weights('weights')
# class_map = train_generator.class_indices

Found 2278 images belonging to 25 classes.
Found 766 images belonging to 25 classes.
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb7_notop.h5
Epoch 1/2
Epoch 2/2


In [None]:
# continue training
loaded_model = load_model('/content/drive/MyDrive/Colab Notebooks/25_classes')
new_history = model.fit(train_generator,
                    validation_data=validation_generator,
                    epochs=1,
                    verbose=1,
                    callbacks=[checkpoint])

OSError: ignored

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# model.save('/content/drive/MyDrive/Colab Notebooks/25_classes')

In [None]:
!cp -r $(pwd)/checkpoint_25 /content/drive/MyDrive/Colab\ Notebooks

# Visualize the accuracy and loss plots

In [None]:
def plot_accuracy(history,title):
    plt.title(title)
    plt.plot(history.history['accuracy']) # change acc to accuracy if testing TF 2.0
    plt.plot(history.history['val_accuracy']) # change val_accuracy if testing TF 2.0
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train_accuracy', 'validation_accuracy'], loc='best')
    plt.show()


def plot_loss(history,title):
    plt.title(title)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train_loss', 'validation_loss'], loc='best')
    plt.show()

plot_accuracy(history,'FOOD101-Inceptionv3')
plot_loss(history,'FOOD101-Inceptionv3')

# Predicting classes for new images from internet using the best trained model

In [None]:
%%time
# Loading the best saved model to make predictions

K.clear_session()
model_best = load_model('/content/drive/MyDrive/Colab Notebooks/trainedmodel_20class.hdf5',compile = False)

In [None]:
!cp trainedmodel_20class.hdf5 drive/MyDrive/Colab\ Notebooks

In [None]:
def predict_class(model, images, show = True):
  for img in images:
    img = image.load_img(img, target_size=(299, 299))
    img = image.img_to_array(img)                    
    img = np.expand_dims(img, axis=0)         
    img = preprocess_input(img)                                      

    pred = model.predict(img)
    print(pred)
    index = np.argmax(pred)
    food_list.sort()
    pred_value = food_list[index]
    #print(pred)
    if show:
        plt.imshow(img[0].astype(np.uint8))                           
        plt.axis('off')
        plt.title(pred_value)
        plt.show()
  

In [None]:
# Make a list of images and test the trained model
images = []
images.append('/content/food-101/images/apple_pie/1038694.jpg')
images.append('/content/food-101/images/cheesecake/102854.jpg')
images.append('/content/food-101/images/chicken_wings/1021138.jpg')
predict_class(model_best, images, True)