<a href="https://colab.research.google.com/github/Adam-Rao/Cats-and-Dogs/blob/model/CatsAndDogs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!unzip "/content/drive/My Drive/kagglecatsanddogs_3367a.zip" -d "/content/drive/My Drive/Cats and Dogs Project"

In [1]:
import _io
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle as pkl
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras import Sequential
from typing import List, Tuple

In [20]:
BATCH_SIZE = 32
DATA_LIST_TYPE = List[Tuple[str, str]]
IMG_SIZE = 224
CONV_INPUT_SIZE = [IMG_SIZE, IMG_SIZE, 3]
DOMINANT_ACTIVATION_FUNCTION = "relu"
FINAL_LAYER_ACTIVATION_FUNCTION = "sigmoid"
LAYER_ONE_CHANNELS = 64
LAYER_TWO_CHANNELS = 128
LAYER_THREE_CHANNELS = 256
LAYER_FOUR_FIVE_CHANNELS = 512
FC_1_2_NODES = 4096
FC_3_NODES = 1000
FC_4_NODES = 2

In [None]:
def create_data_list(data_path: str , label: str) -> DATA_LIST_TYPE:
  """
  data_path: path to folder containing desired files
  label: string argument for the desired label

  Creates and returns a list of tuples containing filepath and desired label
  """
  dataset: DATA_LIST_TYPE = []
  for file in os.listdir(data_path):
    filename, file_ext = os.path.splitext(file)
    if file_ext == ".jpg":
      dataset.append((f"{data_path}/{file}", label))

  return dataset

In [None]:
cat_data_list = create_data_list("/content/drive/My Drive/Cats and Dogs Project/PetImages/Cat", "CAT")
dog_data_list = create_data_list("/content/drive/My Drive/Cats and Dogs Project/PetImages/Dog", "DOG")

general_data_list = cat_data_list + dog_data_list

In [None]:
def shuffle_data_list(data_list: DATA_LIST_TYPE) -> DATA_LIST_TYPE:
  """
  data_list: List of tuples holding data
  Returns shuffled list of tuples
  """
  return shuffle(data_list)

In [None]:
shuffled_data_list = shuffle_data_list(general_data_list)

In [None]:
def save_data_list(data_list: DATA_LIST_TYPE, file_path: str) -> None:
  """
  data_list: List of tuples to be saved
  file_path: Path to file where list is to be saved

  Saves data list to specified file
  """

  save_file = open(file_path, "wb")
  pkl.dump(data_list, save_file)

In [3]:
def load_data_file(file_path: str) -> _io.BufferedReader:
  """
  file_path: Path to file containing saved data
  Loads and returns saved file
  """
  return open(file_path, "rb")

In [None]:
save_data_list(shuffled_data_list, "/content/drive/My Drive/Cats and Dogs Project/Extracted Data/data.pkl")

In [4]:
with load_data_file("/content/drive/My Drive/Cats and Dogs Project/Extracted Data/data.pkl") as f:
  shuffled_data = pkl.load(f)

In [5]:
labels = [i[1] for i in shuffled_data]

unique_labels = np.unique(labels)

In [6]:
boolean_labels = [label == unique_labels for label in labels] # will form our y values

In [7]:
def create_train_test_datasets(X, y, TEST_SIZE=0.2, RANDOM_STATE=42):
  """
  X: Feature values
  y: Label values
  TEST_SIZE: Percentage of test dataset from original. Default is 20%
  RANDOM_STATE: Random integer. Default is 42

  Creates train and test datasets from X and y values.
  Can also be used to create train and validation datasets
  """
  X_train, X_test, y_train, y_test = train_test_split(
      X, 
      y, 
      test_size=TEST_SIZE, 
      random_state=RANDOM_STATE
    )
  
  return X_train, X_test, y_train, y_test

In [8]:
def preprocess_image(file_path: str):
  """
  file_path: File to image
  Returns preprocessed image
  """

  image = tf.io.read_file(file_path)
  image = tf.image.decode_jpeg(image)
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])

  return image

In [9]:
def return_image_label(file_path: str, label: str):
  """
  file_path: Path to image
  label: Image label

  returns preprocessed image and label
  """
  image = preprocess_image(file_path)

  return image, label

In [10]:
def create_data_batches(
    X,
    y=None,
    test_data=False,
    valid_data=False
  ):
  """
  X: Feature values(Images)
  y: Label values
  test_data: Boolean value determining if to create data batches for test data
  valid_data: Boolean value determining if to create data batches for validation data

  Creates and returns data batches from X and/or y values
  """

  if test_data:
    print("Creating test data batches...")
    data = tf.data.Dataset.from_tensor_slices(tf.constant(X))
    data_batch = data.map(preprocess_image).batch(BATCH_SIZE)
    return data_batch

  elif valid_data:
    print("Creating validation data batches...")
    data = tf.data.Dataset.from_tensor_slices(
        (
            tf.constant(X),
            tf.constant(y)
        )
    )
    data_batch = data.map(return_image_label).batch(BATCH_SIZE)
    return data_batch
  
  else:
    print("Creating training data batches...")
    data = tf.data.Dataset.from_tensor_slices(
        (
            tf.constant(X),
            tf.constant(y)
        )
    )
    data_batch = data.map(return_image_label).batch(BATCH_SIZE)
    return data_batch

In [11]:
def get_prediction_label(prediction_probabilities):
  """
  prediction_probabilities: Array of predictions from model

  returns predicted label
  """

  return unique_labels[np.argmax(prediction_probabilities)]

In [12]:
def plot_image_prediction_true_value(prediction_probabilities, X, y, index):
  """
  prediction_probabilities: Array of predictions from model
  X: image to be plotted
  y: true label
  index: Position of image, prediction and true label you wish to show

  plot actual image with the predicted value and true values as title to image plot
  """
  image, true_label, prediction_probabilities = X[index], y[index], predicition_probabilites[index]
  predicted_label = get_prediction_label(prediction_probabilities)

  plt.imshow(image)
  plt.yticks([])
  plt.xticks([])
  plt.title(
      f"Predicted Label: {predicted_label}  True Label: {true_label}\n" 
      f"Probability image is predicted label: {np.argmax(prediction_probabilities) * 100:.2f}")
  

In [13]:
def unbatchify(batched_dataset):
  """
  returns list of images and lables from unbatched dataset
  """
  images_ = []
  labels_ = []

  for image, label in batched_dataset.unbatch().as_numpy_iterator():
    images_.append(image)
    labels_.append(unique_labels[np.argmax(label)])

  return images_, labels_

In [23]:
def create_model():
  """
  Creates and returns custom model: Model based on VGG-16 Architecture -> https://arxiv.org/pdf/1409.1556.pdf
  """
  model = Sequential([
      Conv2D(
          LAYER_ONE_CHANNELS, 
          (3, 3), 
          input_shape=CONV_INPUT_SIZE,
          activation=DOMINANT_ACTIVATION_FUNCTION),
      MaxPool2D(),
      Conv2D(
          LAYER_TWO_CHANNELS, 
          (3, 3),
          activation=DOMINANT_ACTIVATION_FUNCTION),
      MaxPool2D(),
      Conv2D(
          LAYER_THREE_CHANNELS, 
          (3, 3),
          activation=DOMINANT_ACTIVATION_FUNCTION),
      MaxPool2D(),
      Conv2D(
          LAYER_FOUR_FIVE_CHANNELS, 
          (3, 3),
          activation=DOMINANT_ACTIVATION_FUNCTION),
      Conv2D(
          LAYER_FOUR_FIVE_CHANNELS, 
          (3, 3),
          activation=DOMINANT_ACTIVATION_FUNCTION),
      MaxPool2D(),
      Conv2D(
          LAYER_FOUR_FIVE_CHANNELS, 
          (3, 3),
          activation=DOMINANT_ACTIVATION_FUNCTION),
      Conv2D(
          LAYER_FOUR_FIVE_CHANNELS, 
          (3, 3),
          activation=DOMINANT_ACTIVATION_FUNCTION),
      MaxPool2D(),
      tf.keras.layers.GlobalAveragePooling2D(),
      Dense(
          FC_1_2_NODES, 
          activation=DOMINANT_ACTIVATION_FUNCTION),
      Dense(
          FC_1_2_NODES, 
          activation=DOMINANT_ACTIVATION_FUNCTION),
      Dense(
          FC_3_NODES, 
          activation=DOMINANT_ACTIVATION_FUNCTION),
      Dense(
          FC_4_NODES, 
          activation=FINAL_LAYER_ACTIVATION_FUNCTION)
  ])

  model.compile(
      optimizer='adam',
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=['accuracy']
  )
  model.summary()
  
  return model

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    patience=3
  )

In [None]:
def save_model(model, suffix=None):
  modeldir = os.path.join(
      'drive/My Drive/Cats and Dogs Project/models',
      datetime.datetime.now().strftime("%Y%m%d-%H%M%s")
  )
  model_path = modeldir + "-" + suffix + ".h5"
  print(f"Saving model to: {model_path}...")
  model.save(model_path)
  return model_path

In [None]:
def load_model(model_path):
  print(f"Loading saved model from: {model_path}")
  model = tf.keras.models.load_model(
      model_path
  )
  return model

In [24]:
while True: pass #keeps codelab from disconnecting

KeyboardInterrupt: ignored