<a href="https://colab.research.google.com/github/Adam-Rao/Cats-and-Dogs/blob/model/CatsAndDogs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!unzip "/content/drive/My Drive/kagglecatsanddogs_3367a.zip" -d "/content/drive/My Drive/Cats and Dogs Project"

In [None]:
import _io
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle as pkl
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras import Sequential
from typing import List, Tuple

In [None]:
BATCH_SIZE = 32
DATA_LIST_TYPE = List[Tuple[str, str]]
IMG_SIZE = 224

In [None]:
def create_data_list(data_path: str , label: str) -> DATA_LIST_TYPE:
  """
  data_path: path to folder containing desired files
  label: string argument for the desired label

  Creates and returns a list of tuples containing filepath and desired label
  """
  dataset: DATA_LIST_TYPE = []
  for file in os.listdir(data_path):
    filename, file_ext = os.path.splitext(file)
    if file_ext == ".jpg":
      dataset.append((f"{data_path}/{file}", label))

  return dataset

In [None]:
cat_data_list = create_data_list("/content/drive/My Drive/Cats and Dogs Project/PetImages/Cat", "CAT")
dog_data_list = create_data_list("/content/drive/My Drive/Cats and Dogs Project/PetImages/Dog", "DOG")

general_data_list = cat_data_list + dog_data_list

In [None]:
def shuffle_data_list(data_list: DATA_LIST_TYPE) -> DATA_LIST_TYPE:
  """
  data_list: List of tuples holding data
  Returns shuffled list of tuples
  """
  return shuffle(data_list)

In [None]:
shuffled_data_list = shuffle_data_list(general_data_list)

In [None]:
def save_data_list(data_list: DATA_LIST_TYPE, file_path: str) -> None:
  """
  data_list: List of tuples to be saved
  file_path: Path to file where list is to be saved

  Saves data list to specified file
  """

  save_file = open(file_path, "wb")
  pkl.dump(data_list, save_file)

In [None]:
def load_data_file(file_path: str) -> _io.BufferedReader:
  """
  file_path: Path to file containing saved data
  Loads and returns saved file
  """
  return open(file_path, "rb")

In [None]:
save_data_list(shuffled_data_list, "/content/drive/My Drive/Cats and Dogs Project/Extracted Data/data.pkl")

In [None]:
with load_data_file("/content/drive/My Drive/Cats and Dogs Project/Extracted Data/data.pkl") as f:
  shuffled_data = pkl.load(f)

In [None]:
labels = [i[1] for i in shuffled_data]

unique_labels = np.unique(labels)

In [None]:
boolean_labels = [label == unique_labels for label in labels] # will form our y values

In [None]:
def create_train_test_datasets(X, y, TEST_SIZE=0.2, RANDOM_STATE=42):
  """
  X: Feature values
  y: Label values
  TEST_SIZE: Percentage of test dataset from original. Default is 20%
  RANDOM_STATE: Random integer. Default is 42

  Creates train and test datasets from X and y values.
  Can also be used to create train and validation datasets
  """
  X_train, X_test, y_train, y_test = train_test_split(
      X, 
      y, 
      test_size=TEST_SIZE, 
      random_state=RANDOM_STATE
    )
  
  return X_train, X_test, y_train, y_test

In [None]:
def preprocess_image(file_path: str):
  """
  file_path: File to image
  Returns preprocessed image
  """

  image = tf.io.read_file(file_path)
  image = tf.image.decode_jpeg(image)
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])

  return image

In [None]:
def return_image_label(file_path: str, label: str):
  """
  file_path: Path to image
  label: Image label

  returns preprocessed image and label
  """
  image = preprocess_image(file_path)

  return image, label

In [None]:
while True: pass #keeps codelab from disconnecting

KeyboardInterrupt: ignored