In [None]:
import tensorflow as tf

import os
import pathlib
import numpy as np
import matplotlib.pyplot as plt
import re

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

#Download the dataset
URL='https://github.com/Horea94/Fruit-Images-Dataset/archive/master.zip'

path_to_zip = tf.keras.utils.get_file('master.zip', origin=URL, extract=True)

In [None]:
PATH = os.path.join(os.path.dirname(path_to_zip), 'Fruit-Images-Dataset-master')

train_dir = os.path.join(PATH, 'Training')
validation_dir = os.path.join(PATH, 'Test')

In [None]:
#Select the classes we are interested in.
dir = pathlib.Path(validation_dir)

CLASS_NAMES = np.array([item.name for item in dir.glob('*')])

reg=re.compile('^(Apple|Banana|Plum|Pepper|Cherry|Grape|Tomato|Potato|Pear|Peach).*')

temporary_class_names=[]

for c in CLASS_NAMES:
  if reg.match(c):
    temporary_class_names.append(c)

CLASS_NAMES_MOST_USED=temporary_class_names

In [None]:
BATCH_SIZE = 50
IMG_HEIGHT = 32
IMG_WIDTH = 32

In [None]:
#create a dataset containing the training set we are intrested in.
list_ds=[]

for c in CLASS_NAMES_MOST_USED:
  train_dirs = os.path.join(train_dir, c)
  train_dirs = pathlib.Path(train_dirs)
  list_ds.append(tf.data.Dataset.list_files(str(train_dirs/'*')))

train_ds=list_ds[0].concatenate(list_ds[1])
for i in range(2,len(list_ds)):
  train_ds=train_ds.concatenate(list_ds[i])

#Count the number of example in the training set.
num=0
for j in train_ds.as_numpy_iterator():
  num=num+1
TRAINING_NUMBER=num


#create a dataset containing the test set we are intrested in.
list_ds=[]

for c in CLASS_NAMES_MOST_USED:
  test_dirs = os.path.join(validation_dir, c)
  test_dirs = pathlib.Path(test_dirs)
  list_ds.append(tf.data.Dataset.list_files(str(test_dirs/'*')))

test_ds=list_ds[0].concatenate(list_ds[1])
for i in range(2,len(list_ds)):
  test_ds=test_ds.concatenate(list_ds[i])

#Count the number of example in the test set.
num=0
for j in test_ds.as_numpy_iterator():
  num=num+1
TEST_NUMBER=num

In [None]:
#return the label associated to the image.
def decode_label(parts):

  if tf.strings.regex_full_match(parts, '.*Apple.*'):
    return tf.constant(0)
  if tf.strings.regex_full_match(parts, '.*Banana.*'):
    return tf.constant(1)
  if tf.strings.regex_full_match(parts, '.*Plum.*'):
    return tf.constant(2)
  if tf.strings.regex_full_match(parts, '.*Pepper.*'):
    return tf.constant(3)
  if tf.strings.regex_full_match(parts, '.*Cherry.*'):
    return tf.constant(4)
  if tf.strings.regex_full_match(parts, '.*Grape.*'):
    return tf.constant(5)
  if tf.strings.regex_full_match(parts, '.*Tomato.*'):
    return tf.constant(6)
  if tf.strings.regex_full_match(parts, '.*Potato.*'):
    return tf.constant(7)
  if tf.strings.regex_full_match(parts, '.*Pear.*'):
    return tf.constant(8)
  if tf.strings.regex_full_match(parts, '.*Peach.*'):
    return tf.constant(9)
  
  return tf.constant(-1)

#return the image.
def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  # resize the image.
  return tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])

# return the image and the associated label given the file path.
def process_path(file_path):
  label = decode_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

In [None]:
training_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
testing_ds = test_ds.map(process_path, num_parallel_calls=AUTOTUNE)

# Shuffle the images in the training set, and divide the images in the two sets in batch.
ds_for_training=training_ds.shuffle(TRAINING_NUMBER).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
ds_for_test=testing_ds.batch(TEST_NUMBER).prefetch(buffer_size=AUTOTUNE)

In [None]:
#Compute the error of a predictor on a dataset using the zero one loss function.
def zero_one_loss(model,ds):
  probability_model = tf.keras.Sequential([model, 
                                         tf.keras.layers.Softmax()])

  images,labels=next(iter(ds))
  predictions = probability_model.predict(images)

  zero_one_loss=0
  for i in range(0,len(predictions)):
    if labels[i]!=np.argmax(predictions[i]):
      zero_one_loss=zero_one_loss+1

  res=zero_one_loss/(len(predictions))
  return res

In [None]:
acc512=[]

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, 3, padding='same', activation='sigmoid', input_shape=(32, 32 ,3)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='sigmoid'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='sigmoid'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='sigmoid'),
    tf.keras.layers.Dense(10)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

for i in range(5):
  model.fit(x=ds_for_training,
            epochs=1)
  
  test_loss=zero_one_loss(model,ds_for_test)

  acc512.append(test_loss)
  print('epoch: ',i+1)
  print(test_loss)

In [None]:
acc256=[]

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, 3, padding='same', activation='sigmoid', input_shape=(32, 32 ,3)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='sigmoid'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='sigmoid'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='sigmoid'),
    tf.keras.layers.Dense(256, activation='sigmoid'),
    tf.keras.layers.Dense(10)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

for i in range(5):
  model.fit(x=ds_for_training,
            epochs=1)

  test_loss=zero_one_loss(model,ds_for_test)

  acc256.append(test_loss)
  print('epoch: ',i+1)
  print(test_loss)

In [None]:
acc128=[]

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, 3, padding='same', activation='sigmoid', input_shape=(32, 32 ,3)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='sigmoid'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='sigmoid'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='sigmoid'),
    tf.keras.layers.Dense(128, activation='sigmoid'),
    tf.keras.layers.Dense(128, activation='sigmoid'),
    tf.keras.layers.Dense(128, activation='sigmoid'),
    tf.keras.layers.Dense(10)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

for i in range(5):
  model.fit(x=ds_for_training,
            epochs=1)

  test_loss=zero_one_loss(model,ds_for_test)

  acc128.append(test_loss)
  print('epoch: ',i+1)
  print(test_loss)

In [None]:
acc64=[]

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, 3, padding='same', activation='sigmoid', input_shape=(32, 32 ,3)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='sigmoid'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='sigmoid'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(10)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

for i in range(5):
  model.fit(x=ds_for_training,
            epochs=1)

  test_loss=zero_one_loss(model,ds_for_test)

  acc64.append(test_loss)
  print('epoch: ',i+1)
  print(test_loss)

In [None]:
#plot the results
epochs_range = range(1,6)

plt.figure(figsize=(4, 2))
plt.plot(epochs_range, acc512, label='Test Error 512-1')
plt.plot(epochs_range, acc256, label='Test Error 256-2')
plt.plot(epochs_range, acc128, label='Test Error 128-4')
plt.plot(epochs_range, acc64, label='Test Error 64-8')
plt.legend(loc='lower left', prop={'size': 6})
plt.ylabel('Test error')
plt.xlabel('Epoch')
plt.title('Test error')
plt.show()