# Dogs vs. Cats

Type: Binary Classification

Data: https://www.kaggle.com/datasets/erkamk/cat-and-dog-images-dataset

## Imports and Constants

In [None]:
from pathlib import Path
from os import path

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, matthews_corrcoef, roc_auc_score, classification_report

In [None]:
FIGSIZE = (12, 10)

plt.rc('figure', figsize=FIGSIZE)
sns.set_style('whitegrid')

## Get the data

In [None]:
image_dir = Path('../input/cat-and-dog-images-dataset/Dog and Cat .png')
image_dir

In [None]:
filepaths = list(image_dir.glob(r'**/*.png'))
filepaths[:5]

In [None]:
path.split('../input/cat-and-dog-images-dataset/Dog and Cat .png/Dog/g29.png')

In [None]:
path.split('../input/cat-and-dog-images-dataset/Dog and Cat .png/Dog/g29.png')[0]

In [None]:
path.split(path.split('../input/cat-and-dog-images-dataset/Dog and Cat .png/Dog/g29.png')[0])

In [None]:
path.split(path.split('../input/cat-and-dog-images-dataset/Dog and Cat .png/Dog/g29.png')[0])[1]

In [None]:
labels = list(map(lambda x: path.split(path.split(x)[0])[1], filepaths))
labels[:5]

In [None]:
filepaths = pd.Series(filepaths, name='Filepath')
labels = pd.Series(labels, name='Label')

image_df = pd.concat([filepaths, labels], axis=1)
image_df

## Preprocessing

In [None]:
def preprocess_inputs(df):
    df = df.copy()
        
    train_df, test_df = train_test_split(df, test_size=0.1, random_state=1)
    
    return train_df, test_df

In [None]:
train_df, test_df = preprocess_inputs(image_df)
train_df

In [None]:
train_df['Label'].value_counts()

## Set up keras image generators

In [None]:
train_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    validation_split=0.2,
)

test_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
)

In [None]:
train_images = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',
    y_col='Label',
    class_mode='binary',
    seed=1,
    subset='training'
)

val_images = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',
    y_col='Label',
    class_mode='binary',
    seed=1,
    subset='validation'
)

test_images = test_gen.flow_from_dataframe(
    test_df,
    x_col='Filepath',
    y_col='Label',
    class_mode='binary',
    shuffle=False
)

## Model building

Instead of only using accuracy for measuring the performance of the model, let's define multiple metrics.

In [None]:
METRICS = [
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
]

### First Try

In [None]:
inputs = tf.keras.Input(shape=(256, 256, 3))
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=METRICS
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=10,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            patience=3
        )
    ]
)

In [None]:
# plt.plot(history.history['accuracy'], label='accuracy')
# plt.plot(history.history['precision'], label='precision')
# plt.plot(history.history['recall'], label='recall')
plt.plot(history.history['auc'], label='auc')

# plt.plot(history.history['val_accuracy'], label='val_accuracy')
# plt.plot(history.history['val_precision'], label='val_precision')
# plt.plot(history.history['val_recall'], label='val_recall')
plt.plot(history.history['val_auc'], label='val_auc')

plt.title('model performance')
plt.ylabel('matric')
plt.xlabel('epoch')

plt.legend()

plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

### Second Try

In [None]:
inputs = tf.keras.Input(shape=(256, 256, 3))
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(16, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=METRICS
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=10,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            patience=3
        )
    ]
)

In [None]:
# plt.plot(history.history['accuracy'], label='accuracy')
# plt.plot(history.history['precision'], label='precision')
# plt.plot(history.history['recall'], label='recall')
plt.plot(history.history['auc'], label='auc')

# plt.plot(history.history['val_accuracy'], label='val_accuracy')
# plt.plot(history.history['val_precision'], label='val_precision')
# plt.plot(history.history['val_recall'], label='val_recall')
plt.plot(history.history['val_auc'], label='val_auc')

plt.title('model performance')
plt.ylabel('matric')
plt.xlabel('epoch')

plt.legend()

plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

### Third Try

In [None]:
inputs = tf.keras.Input(shape=(256, 256, 3))
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(8, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=METRICS
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=10,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            patience=3
        )
    ]
)

In [None]:
# plt.plot(history.history['accuracy'], label='accuracy')
# plt.plot(history.history['precision'], label='precision')
# plt.plot(history.history['recall'], label='recall')
plt.plot(history.history['auc'], label='auc')

# plt.plot(history.history['val_accuracy'], label='val_accuracy')
# plt.plot(history.history['val_precision'], label='val_precision')
# plt.plot(history.history['val_recall'], label='val_recall')
plt.plot(history.history['val_auc'], label='val_auc')

plt.title('model performance')
plt.ylabel('matric')
plt.xlabel('epoch')

plt.legend()

plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

### Fourth Try

In [None]:
inputs = tf.keras.Input(shape=(256, 256, 3))
x = tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(8, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=METRICS
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=10,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            patience=3
        )
    ]
)

In [None]:
# plt.plot(history.history['accuracy'], label='accuracy')
# plt.plot(history.history['precision'], label='precision')
# plt.plot(history.history['recall'], label='recall')
plt.plot(history.history['auc'], label='auc')

# plt.plot(history.history['val_accuracy'], label='val_accuracy')
# plt.plot(history.history['val_precision'], label='val_precision')
# plt.plot(history.history['val_recall'], label='val_recall')
plt.plot(history.history['val_auc'], label='val_auc')

plt.title('model performance')
plt.ylabel('matric')
plt.xlabel('epoch')

plt.legend()

plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

### Fifth Try

In [None]:
inputs = tf.keras.Input(shape=(256, 256, 3))
x = tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu')(inputs)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=METRICS
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=10,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            patience=3
        )
    ]
)

In [None]:
# plt.plot(history.history['accuracy'], label='accuracy')
# plt.plot(history.history['precision'], label='precision')
# plt.plot(history.history['recall'], label='recall')
plt.plot(history.history['auc'], label='auc')

# plt.plot(history.history['val_accuracy'], label='val_accuracy')
# plt.plot(history.history['val_precision'], label='val_precision')
# plt.plot(history.history['val_recall'], label='val_recall')
plt.plot(history.history['val_auc'], label='val_auc')

plt.title('model performance')
plt.ylabel('matric')
plt.xlabel('epoch')

plt.legend()

plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

### Sixth Try

In [None]:
inputs = tf.keras.Input(shape=(256, 256, 3))
x = tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=METRICS
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            patience=3
        )
    ]
)

In [None]:
# plt.plot(history.history['accuracy'], label='accuracy')
# plt.plot(history.history['precision'], label='precision')
# plt.plot(history.history['recall'], label='recall')
plt.plot(history.history['auc'], label='auc')

# plt.plot(history.history['val_accuracy'], label='val_accuracy')
# plt.plot(history.history['val_precision'], label='val_precision')
# plt.plot(history.history['val_recall'], label='val_recall')
plt.plot(history.history['val_auc'], label='val_auc')

plt.title('model performance')
plt.ylabel('matric')
plt.xlabel('epoch')

plt.legend()

plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

In [None]:
model.evaluate(test_images)

In [None]:
train_images.class_indices

In [None]:
predictions = (model.predict(test_images) >= 0.5).astype(int)
cm = confusion_matrix(test_images.labels, predictions, labels=[0, 1])
ax = sns.heatmap(cm, annot=True, fmt='g', vmin=0, cmap='Blues', cbar=False)

In [None]:
print(classification_report(test_images.labels, predictions, labels=[0, 1], target_names=['Cat', 'Dog']))

In [None]:
predictions.reshape(-1)

In [None]:
matthews_corrcoef(test_images.labels, predictions.reshape(-1))

In [None]:
roc_auc_score(test_images.labels, predictions.reshape(-1))

# For Home

Improve your results from last session by creating a CNN.