In [None]:
#Import general libraries
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import random

#Import ML libraries
import tensorflow as tf
import tensorflow.keras.layers as layers
import tensorflow.keras.optimizers as optimizers
import tensorflow.keras.datasets as datasets
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
#Update path folder to local directory
path = "/content/drive/MyDrive/Colab Notebooks/Good Boy Classifier"
data_path = path + "/Data"
#Using reduced classes
dog_classes = ['Logan', 'Wills'] #Short for Willoughby 
validation_classes = ['Logan Validation', 'Wills Validation']

In [None]:
#Define a function to add noise to training images in the generator
def addNoise(img):
    strength = 10 #Ideal noise strength between 0-20
    deviation = strength*random.random()
    noise = np.random.normal(0, deviation, img.shape)
    img += noise
    img = np.clip(img, 0, 255)
    return img

#Data augmentation for training data
#Small dataset so we must use extensive augmentation
training_datagen = ImageDataGenerator(
    rotation_range = 20,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    horizontal_flip = True,
    brightness_range = [0.8,1.0],
    zoom_range = [0.5,1.0],
    rescale = 1.0/255, #Images stored as ints so rescale required
    preprocessing_function = addNoise
)

#Data flow for training data
training_generator = training_datagen.flow_from_directory(data_path,
    target_size = (224,224),
    classes=dog_classes,
    class_mode='categorical',
    interpolation='lanczos', #Improve image quality on downsizing
    batch_size=8)#Small batches appear to work better for limited dataset

#Data augmentation for validation data
#No changes made here beyond converting int to float
validation_datagen = ImageDataGenerator(
    rescale = 1.0/255,
)

#Data flow for validation data
validation_generator = validation_datagen.flow_from_directory(data_path,
                                        target_size = (224,224),
                                        classes=validation_classes,
                                        class_mode='categorical',
                                        interpolation='lanczos', #Improve image quality on downsizing
                                        batch_size=16) #Max batch size without clipping 16

Found 40 images belonging to 2 classes.
Found 16 images belonging to 2 classes.


In [None]:
#Generic image grid function for visualisations
def plotImageGrid(images, labels, title = '', width = 3):
  num_displayed = width**2
  if (len(images) >= num_displayed) and (len(labels) >= num_displayed):
    for i in range(num_displayed):
      plt.subplot(width,width,1+i)
      plt.axis('off')
      plt.title(labels[i])
      plt.imshow(images[i], cmap='gray_r')
    plt.subplots_adjust(hspace=1) 
    plt.suptitle(title)
    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()
  else:
    raise ValueError("Insufficient images or labels for gridsize")

In [None]:
#Sample image path for augmentation demonstration:
sample_image_location = path + "/Data/Logan/20150917_113732(0).jpg"


#Visualise 9 augmentations for image at chosen path
def visualiseTransformation(image_path):
  original_image = tf.keras.preprocessing.image.load_img(image_path)
  original_image_array = tf.keras.preprocessing.image.img_to_array(original_image)
  print(original_image_array.shape)
  print(np.max(original_image_array))
  augmented_images = np.zeros((9,512,512,3), dtype='float')
  augmented_images_labels = np.zeros((9), dtype='str')
  original_image_array_resized = original_image_array/255.
  plt.title("Original Image")
  plt.axis('off')
  plt.imshow(original_image_array_resized)
  plt.show()
  #Perform augmentations and pass to generic image grid function
  for i in range(9):
    augmented_images[i] = training_datagen.random_transform(original_image_array)
  #Random transform does not perform rescale
  augmented_images = augmented_images/255.0
  plotImageGrid(augmented_images,augmented_images_labels,"Augmented Images",3)

In [None]:
#Visualise predictions as image grid with labels
def visualisePredictions(model):
  validation_batch = validation_generator.next()
  predictions = model.predict(validation_batch[0])
  titles = []
  #Define the images and labels, then call generic image grid function
  for i in range(len(predictions)):
    guess = dog_classes[np.argmax(predictions[i])]
    certainty = round(np.max(predictions[i]),2)
    answer = dog_classes[np.argmax(validation_batch[1][i])]
    titles.append(("Guess: %s (%s), Answer: %s" % (guess,certainty,answer)))
  print(titles)
  plotImageGrid(validation_batch[0],titles,"PupNet Predictions",3)

In [None]:
#Visualise predictions with breakdown by certainty
def analysePrediction(model, num_displayed = 3):
  validation_batch = validation_generator.next()
  predictions = model.predict(validation_batch[0])
  if (len(predictions) >= num_displayed):
    for i in range(num_displayed):
      #Display Image:
      plt.subplot(num_displayed,2,1+i*2)
      plt.axis('off')
      plt.imshow(validation_batch[0][i], cmap='gray_r')
      #Display Predictions:
      plt.subplot(num_displayed,2,2+i*2)
      plt.barh(dog_classes, predictions[i], align='center', height=0.5)

    plt.subplots_adjust(hspace=1) 
    #plt.suptitle(title)
    plt.subplots_adjust(top=0.88)
    plt.show()
  else:
    raise ValueError("Insufficient images or labels for gridsize")

In [None]:
def analysePredictionRevised(model, num_displayed = 3):
  validation_batch = validation_generator.next()
  predictions = model.predict(validation_batch[0])
  max_prediction = np.max(predictions[:num_displayed])+0.1
  if (len(predictions) >= num_displayed):
    fig, axs = plt.subplots(num_displayed, 2,  gridspec_kw={'width_ratios':[1,3]})
    for i in range(num_displayed): 
      axs[i,0].imshow(validation_batch[0][i], cmap='gray_r')
      axs[i,0].axis('off')
      answer = "Answer: " + dog_classes[np.argmax(validation_batch[1][i])]
      axs[i,0].set_title(answer)
      axs[i,1].barh(dog_classes, predictions[i], align='center', height=0.5)
      axs[i,1].set_xlim(0,max_prediction)
      axs[i,1].set_title("Prediction Confidence")
      
  else:
    raise ValueError("Insufficient images or labels for gridsize")


In [None]:
#Plot training performance
def plotTrainingHistory(training_hist,moving_average=20):
  epochs = len(training_hist.history['loss'])
  x = np.arange(epochs)
  loss = training_hist.history['loss']
  loss_mv = np.convolve(loss, np.ones(moving_average), 'valid') / moving_average
  val_loss = training_hist.history['val_loss']
  val_loss_mv = np.convolve(val_loss, np.ones(moving_average), 'valid') / moving_average
  print(len(loss_mv))
  plt.plot(x,loss,'ob',alpha=0.2)
  plt.plot(x[moving_average-1:],loss_mv,'-b')
  plt.plot(x,val_loss,'or',alpha=0.2)
  plt.plot(x[moving_average-1:],val_loss_mv,'-r')
  plt.yscale('log')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend(['Training','Moving Average','Validation','Moving Average'])
  plt.show()

#plotTrainingHistory(hist)

In [None]:
#Define the base classifier
def defineGBClassifier(lr = 5e-4):
  #Use resnet 50 body for feature extraction and disable training
  feature_extractor = ResNet50(weights='imagenet', 
                              input_shape=(224, 224, 3),
                              include_top=False)
  feature_extractor.trainable = False

  num_classes = len(dog_classes)

  model = tf.keras.Sequential()
  model.add(feature_extractor)

  #Define a simple dense head for dog categorisation
  model.add(layers.Flatten())
  model.add(layers.BatchNormalization())
  model.add(layers.Dense(256, activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.BatchNormalization())
  model.add(layers.Dense(128, activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.BatchNormalization())
  model.add(layers.Dense(64, activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.BatchNormalization())
  model.add(layers.Dense(num_classes, activation='softmax'))

  #Lr set via experimentation, transfer learning favours lower rate
  model.compile(loss='categorical_crossentropy',
                optimizer=optimizers.Adam(learning_rate=lr),
                metrics='accuracy')
  
  return model