# 1: Imports and Setup

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import numpy as np
import pandas as pd
from tensorflow.keras.applications import VGG16
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import KFold

num_folds = 5
epochs = 8

# 2: Download and Unzip Dataset

In [None]:
!!wget https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
!unzip cats_and_dogs_filtered.zip

Archive:  cats_and_dogs_filtered.zip
replace cats_and_dogs_filtered/vectorize.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

# 3: Dataset Creation Function

In [None]:
def create_datasets(path):
    image_paths=[]
    image_labels=[]

    for dir in os.listdir(path):
        for file in os.listdir(f'{path}/{dir}'):
            image_path=f'{path}/{dir}/{file}'
            image_label=0 if dir =="cats" else 1

            image_paths.append(image_path)
            image_labels.append(image_label)


    image_paths=np.array(image_paths)
    image_labels=np.array(image_labels)

    return image_paths,image_labels


# 4: Load Training and Testing Data

In [None]:
image_train,label_train=create_datasets('/content/cats_and_dogs_filtered/train')
image_test,label_test=create_datasets('/content/cats_and_dogs_filtered/validation')
print(image_train.shape,label_train.shape,image_test.shape,label_test.shape)
print(image_train[0], label_train[0])

(2000,) (2000,) (1000,) (1000,)
/content/cats_and_dogs_filtered/train/cats/cat.114.jpg 0


# 5: Image Preprocessing Functions

In [None]:
def get_image_tensor_from_path(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, (244, 244))
    return image, label

def augment_image(image, label):
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=32.0 / 255.0)
    image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
    image = tf.clip_by_value(image, 0.0, 1.0)
    return image, label

# 6: Create TensorFlow Dataset Function

In [None]:
def cd_dataset(x, y, batch_size=32, training=False):
    data = tf.data.Dataset.from_tensor_slices((x, y))
    data = data.map(get_image_tensor_from_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    if training:
        data = data.map(augment_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    data = data.shuffle(2000)
    data = data.batch(batch_size)
    data = data.prefetch(tf.data.experimental.AUTOTUNE)

    return data

train_dataset=cd_dataset(image_train,label_train,training=True)
test_dataset=cd_dataset(image_test,label_test)

# 7: Data Visualization

In [None]:
class_names = ['cat', 'dog']

for x, y in test_dataset.take(1):

  plt.figure(figsize=(10, 10))

  for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.imshow(x[i])

    plt.xticks([])
    plt.yticks([])

    plt.xlabel(class_names[y[i]])

KeyboardInterrupt: 

# 8: Model Definition and Compilation

In [None]:
b_model = VGG16(input_shape=(244, 244, 3),include_top=False,weights='imagenet')
b_model.trainable = False
b_model.summary()

In [None]:
model = tf.keras.Sequential([
    b_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=256,activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=1,activation='sigmoid')
   ]
)
model.compile('adam',loss='binary_crossentropy',metrics=['acc'])

# 9: Training with Learning Rate Scheduler

In [None]:
lr_scheduler = ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=3,min_lr=1e-6)
history = model.fit(train_dataset,epochs=8,validation_data=test_dataset,callbacks=[lr_scheduler])

# 10: Plot Training History

In [None]:
train_history = pd.DataFrame(model.history.history)
train_history[['loss','val_loss']].plot(title="Loss Over Epochs")
train_history[['acc', 'val_acc']].plot(title="Accuracy Over Epochs")

# 11: Model Evaluation on Test Set

In [None]:
test_loss,test_accuracy = model.evaluate(test_dataset)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

# 12: Prediction Function with Visualization

In [None]:
def predict_and_show(start_index=None, num_samples=5,external_path=None):
    if external_path:
        image_paths = [external_path]
        actual_labels = [None]
    else:
        image_paths = image_test[start_index:start_index + num_samples]
        actual_labels = ["Dog" if label == 1 else "Cat" for label in label_test[start_index:start_index + num_samples]]

    num_samples = len(image_paths)
    num_rows = (num_samples + 4) // 5
    fig, axes = plt.subplots(num_rows, 5, figsize=(15, 3 * num_rows))
    axes = axes.flatten()

    for i in range(num_samples):
        image_path = image_paths[i]
        actual_label = actual_labels[i]

        image, _ = get_image_tensor_from_path(image_path, label=None)
        image_expanded = np.expand_dims(image, axis=0)

        prediction = model.predict(image_expanded)
        predicted_label = "Dog" if prediction[0] > 0.5 else "Cat"

        axes[i].imshow(image)
        title = f"Predicted: {predicted_label}"
        if actual_label is not None:
            title += f"\nActual: {actual_label}"
        title += f"\nConfidence: {prediction[0][0]:.2f}"
        axes[i].set_title(title, fontsize=10, pad=10)
        axes[i].axis("off")

    for j in range(num_samples, len(axes)):
        axes[j].axis("off")

    plt.tight_layout()
    plt.show()

In [None]:
predict_and_show(start_index=100,num_samples=11)


In [None]:
predict_and_show(external_path='/content/image1.jpeg')

In [None]:
predict_and_show(external_path='/content/image2.jpg')

# 13: K-Fold Cross-Validation

In [None]:

kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

fold_nb = 1
accuracy_per_fold = []
loss_per_fold = []

for train_index, test_index in kf.split(image_train):
    train_images, test_images = image_train[train_index], image_train[test_index]
    train_labels, test_labels = label_train[train_index], label_train[test_index]

    train_dataset = cd_dataset(train_images, train_labels, batch_size=32, training=True)
    test_dataset = cd_dataset(test_images, test_labels, batch_size=32)

    b_model = VGG16(input_shape=(244, 244, 3), include_top=False, weights='imagenet')
    b_model.trainable = False

    model=tf.keras.Sequential([
    b_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=256,activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=1,activation='sigmoid')
   ])

    model.compile('adam',loss='binary_crossentropy', metrics=['acc'])

    print(f'Fold{fold_nb}.....')
    model.fit(train_dataset,epochs=epochs,validation_data=test_dataset,callbacks=[lr_scheduler])

    scores = model.evaluate(test_dataset)
    accuracy_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

    fold_nb += 1

print(f"Average accuracy across folds: {np.mean(accuracy_per_fold):.4f}")
print(f"Average loss across folds: {np.mean(loss_per_fold):.4f}")


# 14: Model Save

In [None]:
model.save("cats_dogs_classifier.h5")
