<a href="https://colab.research.google.com/github/Jamnic98/blue-cheese-classifier/blob/main/blue_cheese_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [58]:
# mount the google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [59]:
%cd drive/MyDrive/

[Errno 2] No such file or directory: 'drive/MyDrive/'
/content/drive/MyDrive


In [60]:
import os
import numpy as np
import shutil
import random

class_names = ['blueCheese', 'notBlueCheese']
root_dir = '/content/drive/MyDrive/'
val_ratio = 0.15
test_ratio = 0.05

all_cheese_image_dirs = {class_names[0]: [], class_names[1]: []}

for class_name in class_names:
  train_URI = root_dir +'data/train/' + class_name
  if not os.path.isdir(train_URI):
    os.makedirs(train_URI)
  
  test_URI = root_dir +'data/test/' + class_name
  if not os.path.isdir(test_URI):
    os.makedirs(test_URI)
  
  val_URI = root_dir +'data/val/' + class_name
  if not os.path.isdir(val_URI):
    os.makedirs(val_URI)

  cheeses = os.listdir(root_dir + class_name)

  for cheese_name in cheeses:
    cheese_dir = root_dir + class_name + '/' + cheese_name

    all_cheese_image_dirs[class_name].extend(
      [cheese_dir + '/'+ name for name in os.listdir(cheese_dir)]
      )  

In [61]:
for class_name in class_names:
  cheese_image_dirs = all_cheese_image_dirs[class_name]

  np.random.shuffle(cheese_image_dirs)

  train_FileNames, val_FileNames, test_FileNames = np.split(
    np.array(cheese_image_dirs),
    [int(len(cheese_image_dirs)* (1 - (val_ratio + test_ratio))), 
    int(len(cheese_image_dirs)* (1 - test_ratio))]
  )

  print('In folder :', class_name)
  print('Total images: ', len(cheese_image_dirs))
  print('Training: ', len(train_FileNames))
  print('Validation: ', len(val_FileNames))
  print('Testing: ', len(test_FileNames))
  print('\n')

  # # Copy-pasting images
  # for train_FileName in train_FileNames.tolist():
  #   shutil.copy(train_FileName, root_dir +'data/train/' + class_name)

  # for val_FileName in val_FileNames.tolist():
  #   shutil.copy(val_FileName, root_dir +'data/val/' + class_name)

  # for test_FileName in test_FileNames.tolist():
  #   shutil.copy(test_FileName, root_dir +'data/test/' + class_name)

In folder : blueCheese
Total images:  1896
Training:  1516
Validation:  285
Testing:  95


In folder : notBlueCheese
Total images:  1848
Training:  1478
Validation:  277
Testing:  93




In [62]:
# use ImageDataGenerator to preprocess the data
from keras.preprocessing.image import ImageDataGenerator

augmented_images_dir=root_dir + 'augmented_images/'
if not os.path.isdir(augmented_images_dir):
  os.mkdir(augmented_images_dir)

# augment the data that we have
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.1,
                                   zoom_range=0.1,
                                   rotation_range=15,
                                   horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale = 1./255)

# prepare training data
augmented_train_images_dir = augmented_images_dir + 'train'
if not os.path.isdir(augmented_train_images_dir):
  os.mkdir(augmented_train_images_dir)
training_data = train_datagen.flow_from_directory('data/train',
                                                  #save_to_dir=augmented_train_images_dir,
                                                  target_size=(64, 64),
                                                  batch_size=32,
                                                  class_mode='categorical'
                                                  )

# prepare test data
augmented_test_images_dir = augmented_images_dir + 'test'
if not os.path.isdir(augmented_test_images_dir):
  os.mkdir(augmented_test_images_dir)
test_data = test_datagen.flow_from_directory('data/test',
                                              #save_to_dir=augmented_test_images_dir,
                                              target_size = (64, 64),
                                              batch_size = 32,
                                              class_mode = 'categorical'
                                              )

# prepare validation data
augmented_val_images_dir = augmented_images_dir + 'val'
if not os.path.isdir(augmented_val_images_dir):
  os.mkdir(augmented_val_images_dir)
validation_data = test_datagen.flow_from_directory('data/val',
                                                    #save_to_dir=augmented_val_images_dir,
                                                    target_size = (64, 64),
                                                    batch_size = 32,
                                                    class_mode = 'categorical'
                                                    )

Found 2994 images belonging to 2 classes.
Found 188 images belonging to 2 classes.
Found 562 images belonging to 2 classes.


In [63]:
### Build the model
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense

# Initialising the CNN
classifier = Sequential()

# Step 1 - Convolution
# make 32 feature detectors with a size of 3x3
# choose the input-image's format to be 64x64 with 3 channels
classifier.add(Conv2D(32, (3, 3), input_shape=(64, 64, 3), activation="relu"))

# Step 2 - Pooling
classifier.add(MaxPooling2D(pool_size = (2, 2)))

# Adding a second convolutional layer
classifier.add(Conv2D(32, (3, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

# Step 3 - Flattening
classifier.add(Flatten())

# Step 4 - Full connection
classifier.add(Dense(activation="relu", units=128))
classifier.add(Dense(activation="softmax", units=2))

# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

### **Change code below this message**

In [64]:
classifier.fit(training_data,
               # steps_per_epoch = (100),
               epochs = 15,
               validation_data = validation_data,
               validation_steps = 10)

Epoch 1/15
13/94 [===>..........................] - ETA: 1:14 - loss: 0.8402 - accuracy: 0.5018

  "Palette images with Transparency expressed in bytes should be "




  " Skipping tag %s" % (size, len(data), tag)


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f325b141050>

In [65]:
generator= train_datagen.flow_from_directory("data/train", batch_size=32)
label_map = (generator.class_indices)
label_map

Found 2994 images belonging to 2 classes.


{'blueCheese': 0, 'notBlueCheese': 1}

In [66]:
from IPython.display import Image
Image('data/test/blueCheese/stilton92.jpg')

## make predictions
import numpy as np
from keras.preprocessing import image
# we have to resize our image if we haven't already
test_image = image.load_img('data/test/blueCheese/stilton92.jpg', target_size = (64, 64))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = classifier.predict(test_image)
print('Predicted class: ', int(result[0][0]))

predictions = classifier.predict(test_data)
# Get most likely class
predicted_classes = np.argmax(predictions, axis=1)

true_classes = test_data.classes
class_labels = list(test_data.class_indices.keys())


import sklearn.metrics as metrics
report = metrics.classification_report(true_classes, predicted_classes, target_names=class_labels)
print(report)

confusion_matrix = metrics.confusion_matrix(y_true=true_classes, y_pred=predicted_classes)
confusion_matrix

Predicted class:  1


  "Palette images with Transparency expressed in bytes should be "


               precision    recall  f1-score   support

   blueCheese       0.53      0.41      0.46        95
notBlueCheese       0.51      0.63      0.57        93

     accuracy                           0.52       188
    macro avg       0.52      0.52      0.52       188
 weighted avg       0.52      0.52      0.52       188



array([[39, 56],
       [34, 59]])