<a href="https://colab.research.google.com/github/Adam-Rao/Cats-and-Dogs/blob/model/CatsAndDogs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import _io
import os
import pickle as pkl
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split

In [68]:
BATCH_SIZE = 32
CAT_DATA_PATH = "/content/drive/My Drive/Cats and Dogs/PetImages/Cat"
DOG_DATA_PATH = "/content/drive/My Drive/Cats and Dogs/PetImages/Dog"
GENERAL_DATA_PATH = "/content/drive/My Drive/CAD PROJECT/Data"
IMG_DIM = 224
INPUT_SIZE = [IMG_DIM, IMG_DIM, 3]

DATA_TUPLE_TYPE = Tuple[str, str]
DATA_LIST_TYPE = List[DATA_TUPLE_TYPE]

In [10]:
def create_data_list(label: str, path: str) -> DATA_LIST_TYPE:
  """
  Function to create a list of tuples containing file path and label
  Takes in desired label and directory holding files as arguments
  """
  data_list: DATA_LIST_TYPE = []
  for file in os.listdir(path):
    data_tuple: DATA_TUPLE_TYPE = (f"{path}/{file}", label)
    data_list.append(data_tuple)
  
  return data_list

In [11]:
def shuffle_data_list(data_list: DATA_LIST_TYPE) -> List[DATA_LIST_TYPE]:
  """
  Shuffles list containing  data and returns shuffled list
  Takes in list containing data as arguments
  """
  return shuffle(data_list)

In [None]:
def save_data(data_list: DATA_LIST_TYPE, save_path: str) -> None:
  file = open(save_path, "wb")
  pkl.dump(data_list, file)

In [12]:
def load_data(data_path: str) -> _io.BufferedReader:
  return open(data_path, "rb")

In [3]:
# cat_data_list = create_data_list("Cat", CAT_DATA_PATH)
# dog_data_list = create_data_list("Dog", DOG_DATA_PATH)
# general_data_list = cat_data_list + dog_data_list
# shuffled_general_data = shuffle_data_list(general_data_list)
# save_data(shuffled_general_data, os.path.join(GENERAL_DATA_PATH, "data.pkl"))

In [None]:
data_file = load_data(os.path.join(GENERAL_DATA_PATH, "data.pkl"))
data = pkl.load(data_file)

In [18]:
def process_image(img_path: str):
  """
  READS IMAGE AND RESIZES IT TO DESIRED 
  Take in image_path as argument, returns processed image
  """
  image = tf.io.read_file(img_path)
  image = tf.image.decode_png(image, channels=3)
  image = tf.image.resize(image, [IMG_DIM, IMG_DIM])
  return image

In [24]:
def image_label(img_path: str, label: str) -> Tuple:
  image = process_image(img_path)
  return image, label

In [65]:
# Create unique labels and y labels from them
labels = np.array([i[1] for i in data])
unique_labels = np.unique(labels)

In [66]:
boolean_labels = [label == unique_labels for label in labels] # will form y labels
images = [i[0] for i in data] # will form X values

In [71]:
def create_data_batches(X, y=None, valid_data=False, test_data=False):
  """
  Creates dataset from provided X and/or y values
  Creates batches of 32 from created dataset
  Return batchified data
  """
  if test_data:
    print("Creating test data batches...")
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X)))
    data_batch = data.map(process_images).batch(BATCH_SIZE)
    return data_batch
  elif valid_data:
    print("Creating validation data batches")
    data = tf.data.Dataset.from_tensor_slices(
        (tf.constant(X), tf.constant(y))
    )
    data_batch = data.map(image_label).batch(BATCH_SIZE)
    return data_batch
  else:
    print("Creating train data batches")
    data = tf.data.Dataset.from_tensor_slices(
        (tf.constant(X), tf.constant(y))
    )
    data = data.shuffle(buffer_size = len(X))
    data_batch = data.map(image_label).batch(BATCH_SIZE)
    return data_batch

Creating train data batches


<BatchDataset shapes: ((None, 224, 224, 3), (None, 2)), types: (tf.float32, tf.bool)>

In [67]:
while True: pass #keeps codelab from disconnecting

KeyboardInterrupt: ignored