In [None]:
import os
import cv2
from shutil import copyfile
from random import seed, random
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as imread
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout, ReLU
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
base_path = "data/cats-vs-dogs"
cats_path = base_path + "/Cat"
dogs_path = base_path + "/Dog"

# for all the images, get their file names and put them against their label folder in a dataframe df

data = []
for filename in os.listdir(cats_path):
    data.append({"filename": cats_path + "/" + filename, "label": "cat"})
for filename in os.listdir(dogs_path):
    data.append({"filename": dogs_path + "/" + filename, "label": "dog"})

# save the dataframe
df = pd.DataFrame(data)
df.to_csv("data/cats-vs-dogs.csv", index=False)

In [None]:
df = pd.read_csv("data/cats-vs-dogs.csv")
df.head()

In [None]:
Image.open(df['filename'][231])

In [None]:
import tqdm, glob, os

In [None]:
labels = {'dog':1, 'cat':0}

X = []
y = []

size = 128, 128

# for image in filenames
# load each image and push that blob to a list X
# push the label to the list y
for image_path in df['filename']:
    try:
        img = Image.open(image_path)
        img = img.convert('RGB')
        img = img.resize(size)
        X.append(np.array(img))
        y.append(labels[df['label'][df['filename'] == image_path].values[0]])
    except:
        pass

In [None]:
X_arr = np.array(X)
y_arr = np.array(y)

# Save to a single compressed file
np.savez_compressed('data/dataset.npz', features=X_arr, labels=y_arr)

In [None]:
data = np.load('data/dataset.npz')
X = data['features']
y = data['labels']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

len(X_train),len(X_test)

In [None]:
model = Sequential([
    # Block 1
    Conv2D(32, (3,3), padding='same', input_shape=(128, 128, 3)),
    BatchNormalization(),
    ReLU(),
    MaxPooling2D((2,2)),

    # Block 2
    Conv2D(64, (3,3), padding='same'),
    BatchNormalization(),
    ReLU(),
    MaxPooling2D((2,2)),

    # Block 3
    Conv2D(128, (3,3), padding='same'),
    BatchNormalization(),
    ReLU(),
    MaxPooling2D((2,2)),

    # Dense layers
    Flatten(),
    Dense(256),
    ReLU(),
    Dropout(0.5),
    Dense(1, 'sigmoid')
])

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [None]:
# Use Early Stopping (mandatory in practice)
callback = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# ImageDataGenerator lets you quickly set up Python generators that can 
# automatically turn image files on disk into batches of preprocessed tensors
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

# Use .flow() for data already loaded in arrays (X_train, y_train)
train_generator = train_datagen.flow(
    X_train, y_train,
    batch_size=32
)

# Use .flow() for validation data as well
val_generator = val_datagen.flow(
    X_test, y_test,
    batch_size=32
)

model.fit(
    train_generator,
    # Use .n instead of .samples for NumpyArrayIterator
    steps_per_epoch=train_generator.n // train_generator.batch_size, 
    epochs=5,
    validation_data=val_generator,
    validation_steps=val_generator.n // val_generator.batch_size,
    callbacks=[callback]
)