## Load the Packages

In [5]:

import os, cv2, random
import numpy as np
import pandas as pd
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score
from tqdm import tqdm
import matplotlib.pyplot as plt

from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model, model_to_dot
from IPython.display import SVG

from kaggle.api.kaggle_api_extended import KaggleApi

api = KaggleApi()
api.authenticate()
print("Authentication successful!")

os.environ['KAGGLE_CONFIG_DIR'] = os.getcwd()
!kaggle competitions download -c dogs-vs-cats -p ./data

import zipfile

with zipfile.ZipFile('./data/train.zip', 'r') as zip_ref:
    zip_ref.extractall('./data/train')

with zipfile.ZipFile('./data/test.zip', 'r') as zip_ref:
    zip_ref.extractall('./data/test')

import shutil
from sklearn.model_selection import train_test_split
import os

original_train_dir = "data/train/train" 


train_dir = "data/train"
val_dir = "data/val"
categories = ["cat", "dog"]

for folder in [train_dir, val_dir]:
    for category in categories:
        os.makedirs(os.path.join(folder, category), exist_ok=True)

for category in categories:
    files = [f for f in os.listdir(original_train_dir) if f.startswith(category)]
    train_files, val_files = train_test_split(files, test_size=0.5, random_state=42)
    
    for f in train_files:
        shutil.copy(os.path.join(original_train_dir, f), os.path.join(train_dir, category, f))
    for f in val_files:
        shutil.copy(os.path.join(original_train_dir, f), os.path.join(val_dir, category, f))


Authentication successful!
401 Client Error: Unauthorized for url: https://api.kaggle.com/v1/competitions.CompetitionApiService/DownloadDataFiles


FileNotFoundError: [Errno 2] No such file or directory: './data/train.zip'

## Parameters



In [None]:
TEST_SIZE = 0.5
RANDOM_STATE = 2018
BATCH_SIZE = 64
NO_EPOCHS = 5
NUM_CLASSES = 2
IMG_SIZE = 224
TRAIN_DIR = "data/train"
VAL_DIR   = "data/val"
TEST_DIR  = "data/test"

## Read the Data


In [None]:
def load_data_from_folders(folder):
    data = []
    labels = []
    
    for label, class_name in enumerate(["cats", "dogs"]):
        class_path = os.path.join(folder, class_name)
        for img in tqdm(os.listdir(class_path)):
            img_path = os.path.join(class_path, img)
            img_array = cv2.imread(img_path)
            img_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
            data.append(img_array)
            labels.append(label)
    
    return np.array(data), np.array(labels)

X_train, y_train = load_data_from_folders(TRAIN_DIR)
X_val, y_val = load_data_from_folders(VAL_DIR)

X_train = X_train / 255.0
X_val = X_val / 255.0

## Model

### Prepare the train data

### Prepare the model


In [None]:
model = Sequential()
model.add(ResNet50(include_top=False, pooling='max', weights='imagenet'))
model.add(Dense(NUM_CLASSES, activation='softmax'))
model.layers[0].trainable = False

### Compile the model

We compile the model, using a **sigmoid** optimized, the loss function as **categorical crossentropy** and the metric **accuracy**.

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Model summary

We plot the model description. We can see that the **ResNet-50** model represent the 1st layer of our model, of type **Model**.

In [None]:
model.summary()

Let's also show the model graphical representation using **plot_model**.

In [None]:
plot_model(model, to_file='model.png')
SVG(model_to_dot(model).create(prog='dot', format='svg'))

## <a id="52">Train the model</a>

We are now ready to train our model.

In [None]:
train_model = model.fit(X_train, y_train,
                  batch_size=BATCH_SIZE,
                  epochs=NO_EPOCHS,
                  verbose=1,
                  validation_data=(X_val, y_val))

## <a id="53">Validation accuracy and loss</a>

Let's show the train and validation accuracy on the same plot. As well, we will represent the train and validation loss on the same graph.

In [None]:
def plot_accuracy_and_loss(train_model):
    hist = train_model.history
    acc = hist['accuracy']
    val_acc = hist['val_accuracy']
    loss = hist['loss']
    val_loss = hist['val_loss']
    epochs = range(len(acc))
    f, ax = plt.subplots(1,2, figsize=(14,6))
    ax[0].plot(epochs, acc, 'g', label='Training accuracy')
    ax[0].plot(epochs, val_acc, 'r', label='Validation accuracy')
    ax[0].set_title('Training and validation accuracy')
    ax[0].legend()
    ax[1].plot(epochs, loss, 'g', label='Training loss')
    ax[1].plot(epochs, val_loss, 'r', label='Validation loss')
    ax[1].set_title('Training and validation loss')
    ax[1].legend()
    plt.show()
plot_accuracy_and_loss(train_model)

Let's also show the numeric validation accuracy and loss.

In [None]:
score = model.evaluate(X_val, y_val, verbose=0)
print('Validation loss:', score[0])
print('Validation accuracy:', score[1])

## Validation Accuracy

In [None]:
y_pred = np.argmax(model.predict(X_val), axis=1)
y_true = y_val

We create two indices, **correct** and **incorrect**, for the images in the validation set with class predicted correctly and incorrectly, respectively.

In [None]:
predicted_classes = np.argmax(model.predict(X_val), axis=1)
correct = np.nonzero(predicted_classes==y_true)[0]
incorrect = np.nonzero(predicted_classes!=y_true)[0]


We saw what is the number of correctly vs. incorrectly predicted values in the validation set.    

We show here the classification report for the validation set, with the accuracy per class and overall.

In [None]:
target_names = ["Class {}:".format(i) for i in range(NUM_CLASSES)]
print(classification_report(y_true, predicted_classes, target_names=target_names))

## Precision and Recall

In [None]:
y_pred = np.argmax(model.predict(X_val), axis=1)

precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)

print("Precision:", precision)
print("Recall:", recall)

## Show Test Images With Predicted Class



### Test data prediction

In [None]:
num_images = 25
indices = random.sample(range(len(X_val)), num_images)

plt.figure(figsize=(15, 15))
for i, idx in enumerate(indices):
    img = X_val[idx]
    true_label = y_val[idx]

    pred = model.predict(img.reshape(1, IMG_SIZE, IMG_SIZE, 3), verbose=0)
    pred_label = np.argmax(pred)

    plt.subplot(5, 5, i + 1)
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.title(f"T: {'Dog' if true_label==1 else 'Cat'} | P: {'Dog' if pred_label==1 else 'Cat'}")

plt.tight_layout()
plt.show()
