In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import

In [50]:
import os
import cv2
import csv
import random
import string
import zipfile
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from shutil import copyfile
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img

# Util Function

In [25]:
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
  files = os.listdir(SOURCE_DIR)
  quantity = len(files)
  rand_files = random.sample(files, quantity)
  train_size = quantity * SPLIT_SIZE
  train_count = 0
  zero_len_count = 0
  for i, filename in enumerate(rand_files):
    if os.path.getsize(os.path.join(SOURCE_DIR, filename)) <= 0:
      print("{} is zero length, so ignoring.".format(filename))
      zero_len_count = zero_len_count + 1
      train_size = (quantity-zero_len_count) * SPLIT_SIZE
      continue
    if train_count < train_size:
      copyfile(os.path.join(SOURCE_DIR, filename), os.path.join(TRAINING_DIR, filename))
      train_count = train_count + 1
    else:
      copyfile(os.path.join(SOURCE_DIR, filename), os.path.join(VALIDATION_DIR, filename))

In [31]:
def split_val(VALIDATION_DIR, TEST_DIR):
  files = os.listdir(VALIDATION_DIR)
  quantity = len(files)
  rand_files = random.sample(files, quantity)
  train_size = quantity * 0.5
  train_count = 0
  zero_len_count = 0
  for i, filename in enumerate(rand_files):
    if os.path.getsize(os.path.join(VALIDATION_DIR, filename)) <= 0:
      print("{} is zero length, so ignoring.".format(filename))
      zero_len_count = zero_len_count + 1
      train_size = (quantity-zero_len_count) * 0.5
      continue
    if train_count < train_size:
      copyfile(os.path.join(VALIDATION_DIR, filename), os.path.join(TEST_DIR, filename))
      train_count = train_count + 1
    else:
      break

# Extract Data

In [4]:
df = pd.read_csv(r"/content/drive/MyDrive/Snacktify/Reviews.csv")

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 568454 entries, 0 to 568453
Data columns (total 10 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   Id                      568454 non-null  int64 
 1   ProductId               568454 non-null  object
 2   UserId                  568454 non-null  object
 3   ProfileName             568438 non-null  object
 4   HelpfulnessNumerator    568454 non-null  int64 
 5   HelpfulnessDenominator  568454 non-null  int64 
 6   Score                   568454 non-null  int64 
 7   Time                    568454 non-null  int64 
 8   Summary                 568427 non-null  object
 9   Text                    568454 non-null  object
dtypes: int64(5), object(5)
memory usage: 43.4+ MB


In [6]:
df.head()

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...


In [9]:
local_zip = '/content/drive/MyDrive/Snacktify/dataset.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('./data/')
zip_ref.close()

# Exploratory Data Analysis

In [32]:
dataset_dir = './data/dataset'
train_dir = './data/train'
validation_dir = './data/validation'
test_dir = './data/test'

In [16]:
for cls in os.listdir(dataset_dir):
  print("Total {} images: {}".format(cls, len(os.listdir(os.path.join(dataset_dir, cls)))))

Total serabi solo images: 300
Total putu ayu images: 300
Total lanting images: 300
Total wajik images: 300
Total lumpia images: 300
Total grontol images: 300


# Data Preprocessing

In [28]:
import shutil

shutil.rmtree('./data/train')
shutil.rmtree('./data/validation')

In [29]:
for cls in os.listdir(dataset_dir):
  os.makedirs(os.path.join(train_dir, cls))
  os.makedirs(os.path.join(validation_dir, cls))

In [30]:
for cls in os.listdir(dataset_dir):
  split_data(os.path.join(dataset_dir, cls), os.path.join(train_dir, cls), os.path.join(validation_dir, cls), 0.7)

In [39]:
train_datagen = ImageDataGenerator(rescale=1.0/255.)
train_generator = train_datagen.flow_from_directory(directory=train_dir,
                                                    batch_size=8,
                                                    class_mode='categorical',
                                                    target_size=(150, 150))

validation_datagen = ImageDataGenerator(rescale=1.0/255.)
validation_generator = validation_datagen.flow_from_directory(directory=validation_dir,
                                                              batch_size=8,
                                                              class_mode='categorical',
                                                              target_size=(150, 150))

Found 1260 images belonging to 6 classes.
Found 540 images belonging to 6 classes.


# Modelling

In [45]:
model = tf.keras.models.Sequential([ 
  tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2), 
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(216, activation='relu'), 
  tf.keras.layers.Dense(6, activation='softmax')  
])

In [46]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy']) 

In [47]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 148, 148, 16)      448       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 74, 74, 16)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 72, 72, 32)        4640      
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 36, 36, 32)       0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 34, 34, 64)        18496     
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 17, 17, 64)      

In [48]:
model.fit(train_generator, epochs=15, validation_data=validation_generator)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f085f86f7f0>

In [65]:
test_img = cv2.imread("/content/drive/MyDrive/Snacktify/test2.jpg")
test_img = cv2.resize(test_img, (150, 150))
test_img = (np.expand_dims(test_img, 0))
print(test_img.shape)

(1, 150, 150, 3)


In [64]:
model.predict(test_img)



array([[0., 1., 0., 0., 0., 0.]], dtype=float32)

In [49]:
model_version = "1"
model_save = "models"
model_path = os.path.join(model_save, model_version)
tf.saved_model.save(model, model_path)

