<a href="https://colab.research.google.com/github/Adam-Rao/Cats-and-Dogs/blob/model/CatsAndDogs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
import _io
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle as pkl
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras import Sequential
from typing import List, Tuple

In [25]:
# Data and saved models paths
CAT_DATA_PATH = "/content/drive/My Drive/Cats and Dogs/PetImages/Cat"
DOG_DATA_PATH = "/content/drive/My Drive/Cats and Dogs/PetImages/Dog"
GENERAL_DATA_PATH = "/content/drive/My Drive/CAD PROJECT/Data"

# Neural Net Constants
ACTIVATION_FUNCTION = "relu"
OUTPUT_ACTIVATION_FUNCTION = "sigmoid"
BATCH_SIZE = 32
DENSE_UNITS_ONE = 4096
DENSE_UNITS_TWO = 1000
DENSE_UNITS_THREE = 2
FILTER_SIZE_ONE = 64
FILTER_SIZE_TWO = 128
FILTER_SIZE_THREE = 256
FILTER_SIZE_FOUR = 512
IMG_DIM = 224
INPUT_SHAPE = [IMG_DIM, IMG_DIM, 3]

DATA_TUPLE_TYPE = Tuple[str, str]
DATA_LIST_TYPE = List[DATA_TUPLE_TYPE]

In [3]:
def create_data_list(label: str, path: str) -> DATA_LIST_TYPE:
  """
  Function to create a list of tuples containing file path and label
  Takes in desired label and directory holding files as arguments
  """
  data_list: DATA_LIST_TYPE = []
  for file in os.listdir(path):
    data_tuple: DATA_TUPLE_TYPE = (f"{path}/{file}", label)
    data_list.append(data_tuple)
  
  return data_list

In [None]:
def shuffle_data_list(data_list: DATA_LIST_TYPE) -> List[DATA_LIST_TYPE]:
  """
  Shuffles list containing  data and returns shuffled list
  Takes in list containing data as arguments
  """
  return shuffle(data_list)

In [None]:
def save_data(data_list: DATA_LIST_TYPE, save_path: str) -> None:
  file = open(save_path, "wb")
  pkl.dump(data_list, file)

In [5]:
def load_data(data_path: str) -> _io.BufferedReader:
  return open(data_path, "rb")

In [None]:
# cat_data_list = create_data_list("Cat", CAT_DATA_PATH)
# dog_data_list = create_data_list("Dog", DOG_DATA_PATH)
# general_data_list = cat_data_list + dog_data_list
# shuffled_general_data = shuffle_data_list(general_data_list)
# save_data(shuffled_general_data, os.path.join(GENERAL_DATA_PATH, "data.pkl"))

In [6]:
data_file = load_data(os.path.join(GENERAL_DATA_PATH, "data.pkl"))
data = pkl.load(data_file)

In [7]:
def process_image(img_path: str):
  """
  READS IMAGE AND RESIZES IT TO DESIRED 
  Take in image_path as argument, returns processed image
  """
  image = tf.io.read_file(img_path)
  image = tf.image.decode_png(image, channels=3)
  image = tf.image.resize(image, [IMG_DIM, IMG_DIM])
  return image

In [8]:
def image_label(img_path: str, label: str) -> Tuple:
  image = process_image(img_path)
  return image, label

In [9]:
# Create unique labels and y labels from them
labels = np.array([i[1] for i in data])
unique_labels = np.unique(labels)

In [10]:
boolean_labels = [label == unique_labels for label in labels] # will form y labels
images = [i[0] for i in data] # will form X values

In [11]:
def create_data_batches(X, y=None, valid_data=False, test_data=False):
  """
  Creates dataset from provided X and/or y values
  Creates batches of 32 from created dataset
  Return batchified data
  """
  if test_data:
    print("Creating test data batches...")
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X)))
    data_batch = data.map(process_images).batch(BATCH_SIZE)
    return data_batch
  elif valid_data:
    print("Creating validation data batches")
    data = tf.data.Dataset.from_tensor_slices(
        (tf.constant(X), tf.constant(y))
    )
    data_batch = data.map(image_label).batch(BATCH_SIZE)
    return data_batch
  else:
    print("Creating train data batches")
    data = tf.data.Dataset.from_tensor_slices(
        (tf.constant(X), tf.constant(y))
    )
    data = data.shuffle(buffer_size = len(X))
    data_batch = data.map(image_label).batch(BATCH_SIZE)
    return data_batch

In [31]:
def create_model():
  """
  Creates and returns defined model
  """
  model = Sequential([
    Conv2D(FILTER_SIZE_ONE, (3, 3), activation=ACTIVATION_FUNCTION, input_shape=INPUT_SHAPE),
    MaxPool2D(),
    Conv2D(FILTER_SIZE_TWO, (3, 3), activation=ACTIVATION_FUNCTION),
    MaxPool2D(),
    Conv2D(FILTER_SIZE_THREE, (3, 3), activation=ACTIVATION_FUNCTION),
    Conv2D(FILTER_SIZE_THREE, (3, 3), activation=ACTIVATION_FUNCTION),
    MaxPool2D(),
    Conv2D(FILTER_SIZE_FOUR, (3, 3), activation=ACTIVATION_FUNCTION),
    Conv2D(FILTER_SIZE_FOUR, (3, 3), activation=ACTIVATION_FUNCTION),
    MaxPool2D(),
    Conv2D(FILTER_SIZE_FOUR, (3, 3), activation=ACTIVATION_FUNCTION),
    Conv2D(FILTER_SIZE_FOUR, (3, 3), activation=ACTIVATION_FUNCTION),
    MaxPool2D(),
    Flatten(),
    Dense(units=DENSE_UNITS_ONE, activation=ACTIVATION_FUNCTION),
    Dense(units=DENSE_UNITS_ONE, activation=ACTIVATION_FUNCTION),
    Dense(units=DENSE_UNITS_TWO, activation=ACTIVATION_FUNCTION),
    Dense(units=DENSE_UNITS_THREE, activation=OUTPUT_ACTIVATION_FUNCTION)
  ])

  model.compile(
      optimizer=tf.keras.optimizers.Adam(),
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics="accuracy"
  )  

  return model

In [34]:
def return_predicted_label(prediction_probabilities):
  """
  Returns label predicted by model
  Takes in predictions array as input argument
  """
  return unique_labels[np.argmax(prediction_probabilities)]

In [36]:
def debatch(dataset):
  """
  Returns labels and images from batched dataset
  """
  images = []
  labels = []

  with image, label in dataset.unbatch().as_numpy_iterator():
    images.append(image)
    labels.append(label)

  return images, labels

In [40]:
def visualize_prediction(
    predictions,
    true_labels,
    true_images,
    index=1
):
  """
  Takes in predictions array, true labels, true images and index as arguments
  Show true image, and predicted label vis a vis true label
  """
  prediction, label, image = predictions[index], true_labels[index], true_images[index]
  predicted_label = return_predicted_label(prediction) 

  plt.imshow(image)
  plt.xticks([])
  plt.yticks([])
  plt.title(f"Predicted Label: {predicted_label}, True Label: {label}, Probability image is correct: {np.max(prediction)*100:2.2f}")

In [39]:
while True: pass #keeps codelab from disconnecting

KeyboardInterrupt: ignored