In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import

In [40]:
import os
import cv2
import csv
import random
import string
import shutil
import zipfile
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from shutil import copyfile, move
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img

# Util Function

In [3]:
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
  files = os.listdir(SOURCE_DIR)
  quantity = len(files)
  rand_files = random.sample(files, quantity)
  train_size = quantity * SPLIT_SIZE
  train_count = 0
  zero_len_count = 0
  for i, filename in enumerate(rand_files):
    if os.path.getsize(os.path.join(SOURCE_DIR, filename)) <= 0:
      print("{} is zero length, so ignoring.".format(filename))
      zero_len_count = zero_len_count + 1
      train_size = (quantity-zero_len_count) * SPLIT_SIZE
      continue
    if train_count < train_size:
      copyfile(os.path.join(SOURCE_DIR, filename), os.path.join(TRAINING_DIR, filename))
      train_count = train_count + 1
    else:
      copyfile(os.path.join(SOURCE_DIR, filename), os.path.join(VALIDATION_DIR, filename))

In [35]:
def split_val(VALIDATION_DIR, TEST_DIR):
  files = os.listdir(VALIDATION_DIR)
  quantity = len(files)
  rand_files = random.sample(files, quantity)
  train_size = quantity * 0.5
  train_count = 0
  zero_len_count = 0
  for i, filename in enumerate(rand_files):
    if os.path.getsize(os.path.join(VALIDATION_DIR, filename)) <= 0:
      print("{} is zero length, so ignoring.".format(filename))
      zero_len_count = zero_len_count + 1
      train_size = (quantity-zero_len_count) * 0.5
      continue
    if train_count < train_size:
      move(os.path.join(VALIDATION_DIR, filename), os.path.join(TEST_DIR, filename))
      train_count = train_count + 1
    else:
      break

# Extract Data

In [None]:
df = pd.read_csv(r"/content/drive/MyDrive/Snacktify/Reviews.csv")

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 568454 entries, 0 to 568453
Data columns (total 10 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   Id                      568454 non-null  int64 
 1   ProductId               568454 non-null  object
 2   UserId                  568454 non-null  object
 3   ProfileName             568438 non-null  object
 4   HelpfulnessNumerator    568454 non-null  int64 
 5   HelpfulnessDenominator  568454 non-null  int64 
 6   Score                   568454 non-null  int64 
 7   Time                    568454 non-null  int64 
 8   Summary                 568427 non-null  object
 9   Text                    568454 non-null  object
dtypes: int64(5), object(5)
memory usage: 43.4+ MB


In [None]:
df.head()

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...


In [5]:
local_zip = '/content/drive/MyDrive/Snacktify/dataset.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('./data/')
zip_ref.close()

# Exploratory Data Analysis

In [6]:
dataset_dir = './data/dataset'
train_dir = './data/train'
validation_dir = './data/validation'
test_dir = './data/test'

In [None]:
for cls in os.listdir(dataset_dir):
  print("Total {} images: {}".format(cls, len(os.listdir(os.path.join(dataset_dir, cls)))))

Total serabi solo images: 300
Total putu ayu images: 300
Total lanting images: 300
Total wajik images: 300
Total lumpia images: 300
Total grontol images: 300


# Data Preprocessing

In [41]:
shutil.rmtree('./data/train')
shutil.rmtree('./data/validation')
shutil.rmtree('./data/test')

In [42]:
for cls in os.listdir(dataset_dir):
  os.makedirs(os.path.join(train_dir, cls))
  os.makedirs(os.path.join(validation_dir, cls))
  os.makedirs(os.path.join(test_dir, cls))

In [43]:
for cls in os.listdir(dataset_dir):
  split_data(os.path.join(dataset_dir, cls), os.path.join(train_dir, cls), os.path.join(validation_dir, cls), 0.7)
  split_val(os.path.join(validation_dir, cls), os.path.join(test_dir, cls))

In [56]:
train_datagen = ImageDataGenerator(rescale=1.0/255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')
train_generator = train_datagen.flow_from_directory(directory=train_dir,
                                                    batch_size=8,
                                                    class_mode='sparse',
                                                    target_size=(150, 150))

validation_datagen = ImageDataGenerator(rescale=1.0/255.)
validation_generator = validation_datagen.flow_from_directory(directory=validation_dir,
                                                              batch_size=8,
                                                              class_mode='sparse',
                                                              target_size=(150, 150))

test_datagen = ImageDataGenerator(rescale=1.0/255.)
test_generator = validation_datagen.flow_from_directory(directory=test_dir,
                                                              batch_size=8,
                                                              class_mode='sparse',
                                                              target_size=(150, 150))

Found 1260 images belonging to 6 classes.
Found 270 images belonging to 6 classes.
Found 270 images belonging to 6 classes.


# Model Training

In [112]:
model = tf.keras.models.Sequential([ 
  tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150, 150, 3)),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2), 
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2), 
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'), 
  tf.keras.layers.Dense(6, activation='softmax')  
])

In [113]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy']) 

In [114]:
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_36 (Conv2D)          (None, 148, 148, 16)      448       
                                                                 
 max_pooling2d_36 (MaxPoolin  (None, 74, 74, 16)       0         
 g2D)                                                            
                                                                 
 conv2d_37 (Conv2D)          (None, 72, 72, 32)        4640      
                                                                 
 max_pooling2d_37 (MaxPoolin  (None, 36, 36, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_38 (Conv2D)          (None, 34, 34, 64)        18496     
                                                                 
 max_pooling2d_38 (MaxPoolin  (None, 17, 17, 64)     

In [115]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

In [116]:
model.fit(train_generator, epochs=50, validation_data=validation_generator, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 42: early stopping


<keras.callbacks.History at 0x7fb6110eb310>

In [117]:
loss, acc = model.evaluate(test_generator)
acc



0.8518518805503845

# Deploy Model

In [132]:
labels = ['grontol', 'lanting', 'lumpia', 'putu ayu', 'serabi solo', 'wajik']

test_img = cv2.imread("/content/drive/MyDrive/Snacktify/test.jpg")
test_img = cv2.resize(test_img, (150, 150))
test_img = (np.expand_dims(test_img, 0))
print(test_img.shape)

(1, 150, 150, 3)


In [133]:
labels[tf.argmax(model.predict(test_img), 1)[0]]



'putu ayu'

In [134]:
model_version = "1"
model_save = "snackscan"
model_path = os.path.join(model_save, model_version)
tf.saved_model.save(model, model_path)



In [136]:
shutil.make_archive('model', 'zip', '.', 'snackscan')

'/content/model.zip'