In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from zipfile import ZipFile
import cv2
from tqdm import tqdm
import os
import matplotlib.pyplot as plt

In [2]:
#The code below unzips the file for the code editor that I am using
#If you are using a downloaded editor, such as Visual Studio, or have already unzipped the file, this is not needed
train_path = '../input/dogs-vs-cats-redux-kernels-edition/train.zip'

with ZipFile(train_path, 'r') as zip:
    
    train = zip.extractall()

In [3]:
train_path = './train' #replace this with the path for your train folder (not the zipped one)

In [4]:
filename = os.listdir(train_path)

plt.figure(figsize=(10,10)) #creates a plot
for i in range(1, 7):
    img_array = cv2.imread(os.path.join(train_path, filename[i])) #finds file path of an image, then gathers the pixels of the image
    resize_image = cv2.resize(img_array, (100, 100)) #resizes image to 100 x 100
    plt.subplot(3,3, i) #creates a subplot to display all the images
    image = cv2.cvtColor(resize_image, cv2.COLOR_BGR2RGB) #recolores image from bgr to rgb
    plt.axis('off') 
    plt.imshow(image) #finally displays the image

In [5]:
def label_img(img):
    animal = img.split('.')[0] #takes only the first part of the file name
    if animal == 'cat': return [0, 1] #returns the one-hot encoded version for Cat
    if animal == 'dog': return [1, 0] #returns the one-hot encoded version for Dog
    #the labels can be read as [dogness, catness]

In [6]:
def preprocess_train():
    X = []
    y = []
    
    for img in tqdm(os.listdir(train_path)[:20000]): #takes every image inside the first 20,000 train images
        label = label_img(img) #labels the image
        path = os.path.join(train_path, img) #creates a path specific to the image
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) #converts the image into grayscale, then converts the image into pixels
        img = cv2.resize(img, (100, 100)) #resizes the image to 100 x 100
        X.append(img) #adds the image to X
        y.append(label) #adds the label to Y
    return np.array(X), np.array(y)

In [7]:
X, y = preprocess_train() #creates our X and Y from the function

In [8]:
X.shape, y.shape #prints out the shape of our X and y lists

In [9]:
X = X.reshape(-1, 100, 100, 1) #we have to include the number of channels (the type of image - RGB, grayscale, ect)
#we have to reshape our X array to include the number of channels

In [10]:
X.shape, y.shape #prints out the shape after reshaping our array

In [11]:
from sklearn.model_selection import train_test_split 

In [12]:
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size = 0.7, random_state = 42)
#Splits X and Y into train and test

In [13]:
X_train.shape, y_train.shape #prints out our train and test shape again

In [14]:
from tensorflow.keras import layers #imports the layers module from keras to make model building easier

In [15]:
def build_model():
    model = tf.keras.Sequential() #creates our base model
    model.add(layers.InputLayer(input_shape = [100, 100, 1])) #defines the input shape
    model.add(layers.Conv2D(192, (3, 3), activation = 'relu')) #adds our first Conv2D layer
    model.add(layers.MaxPool2D((3, 3))) #First Max Pool
    model.add(layers.Conv2D(128, (3, 3), activation = 'relu')) #second Conv2D
    model.add(layers.MaxPool2D((3, 3))) #second Max Pool
    model.add(layers.Conv2D(64, (3, 3), activation = 'relu')) #Final Conv2D
    model.add(layers.MaxPool2D((3, 3)))  #final Max Pool
    model.add(layers.Flatten()) #Flattens the 4-layer array into a 2 layer array
    model.add(layers.Dense(2, activation = 'sigmoid')) #output layer
    
    model.compile(optimizer = 'adam', loss = tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy'])
    #compiles our model
    
    return model

In [16]:
model = build_model()

In [17]:
model.summary() #shows a summary of our model

In [18]:
history = model.fit(X_train, y_train, validation_data = (X_val, y_val), epochs = 10)

In [19]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['train', 'test'], loc='upper left')

In [20]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['train', 'test'], loc='upper left')