# Data Pre Processing


In [1]:
import numpy as np
import tensorflow as tf

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import os

dataset_path = os.listdir('dataset')

print (dataset_path)  #what kinds of classes are in this dataset

print("Types of classes labels found: ", len(dataset_path))

['Donuts', 'Fried_rice', 'Pizza']
Types of classes labels found:  3


In [2]:
class_labels = []

for item in dataset_path:
 # Get all the file names
 all_classes = os.listdir('dataset' + '/' +item)

 # Add them to the list
 for room in all_classes:
    class_labels.append((item, str('dataset_path' + '/' +item) + '/' + room))

In [3]:
# Build a dataframe        
df = pd.DataFrame(data=class_labels, columns=['Labels', 'image'])
print(df.head())
print(df.tail())

   Labels                                 image
0  Donuts  dataset_path/Donuts/00FAM7JH1SMS.jpg
1  Donuts  dataset_path/Donuts/058EB4MR27V3.jpg
2  Donuts  dataset_path/Donuts/05R77196O1EI.jpg
3  Donuts  dataset_path/Donuts/05UMHXBXD57X.jpg
4  Donuts  dataset_path/Donuts/071B44KIEMZY.jpg
     Labels                                image
1495  Pizza  dataset_path/Pizza/8F7S081L4I8U.jpg
1496  Pizza  dataset_path/Pizza/8FFS9B1MNBM1.jpg
1497  Pizza  dataset_path/Pizza/8FIMCWVEPGDJ.jpg
1498  Pizza  dataset_path/Pizza/8FM9XBJN9EEB.jpg
1499  Pizza  dataset_path/Pizza/8G2AJFO8DVQP.jpg


In [4]:
# Let's check how many samples for each category are present
print("Total number of images in the dataset: ", len(df))

label_count = df['Labels'].value_counts()
print(label_count)

Total number of images in the dataset:  1500
Donuts        500
Fried_rice    500
Pizza         500
Name: Labels, dtype: int64


In [5]:
import cv2
path = 'dataset/'
dataset_path = os.listdir('dataset')

im_size = 260

images = []
labels = []

for i in dataset_path:
    data_path = path + str(i)  
    filenames = [i for i in os.listdir(data_path) ]
   
    for f in filenames:
        img = cv2.imread(data_path + '/' + f)
        img = cv2.resize(img, (im_size, im_size))
        images.append(img)
        labels.append(i)

In [6]:
#This model takes input images of shape (260, 260, 3), and the input data should range [0, 255]. 

images = np.array(images)

images = images.astype('float32') / 255.0
images.shape

(1500, 260, 260, 3)

In [7]:
y=df['Labels'].values
print(y)

['Donuts' 'Donuts' 'Donuts' ... 'Pizza' 'Pizza' 'Pizza']


In [8]:
y=y.reshape(-1,1)
print(y)

[['Donuts']
 ['Donuts']
 ['Donuts']
 ...
 ['Pizza']
 ['Pizza']
 ['Pizza']]


In [9]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer([('my_ohe', OneHotEncoder(), [0])], remainder='passthrough')
Y = ct.fit_transform(y) 
print(Y)

[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 ...
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [10]:
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split


images, Y = shuffle(images, Y, random_state=1)


train_x, test_x, train_y, test_y = train_test_split(images, Y, test_size=0.25, random_state=415)

#inspect the shape of the training and testing.
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

(1125, 260, 260, 3)
(1125, 3)
(375, 260, 260, 3)
(375, 3)


# EfficientNetB2 Implementation :

In [11]:
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB2

NUM_CLASSES = 3
IMG_SIZE = 260

inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))


# Using model without transfer learning

outputs = EfficientNetB2(include_top=True, weights=None, classes=NUM_CLASSES)(inputs)

In [None]:
model = tf.keras.Model(inputs, outputs)

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"] )

model.summary()

hist = model.fit(train_x, train_y, epochs=30, verbose=2)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 260, 260, 3)]     0         
                                                                 
 efficientnetb2 (Functional)  (None, 3)                7772796   
                                                                 
Total params: 7,772,796
Trainable params: 7,705,221
Non-trainable params: 67,575
_________________________________________________________________
Epoch 1/30
36/36 - 822s - loss: 3.3421 - accuracy: 0.3538 - 822s/epoch - 23s/step
Epoch 2/30
36/36 - 570s - loss: 2.5428 - accuracy: 0.3396 - 570s/epoch - 16s/step
Epoch 3/30
36/36 - 666s - loss: 2.0459 - accuracy: 0.3582 - 666s/epoch - 18s/step
Epoch 4/30
36/36 - 602s - loss: 2.1764 - accuracy: 0.3449 - 602s/epoch - 17s/step
Epoch 5/30
