# Cat vs Dog Classifier

In [None]:
%pip install bing-image-downloader

In [None]:
import os
import requests

import mlflow
import mlflow.keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import MobileNet

from datetime import datetime
import matplotlib.pyplot as plt

from bing_image_downloader import downloader
from pathlib import Path
import imghdr

import numpy as np
from dotenv import load_dotenv
from hydrop_sharepointsync.sync import import_from_sharepoint, export_to_sharepoint

print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

## Data Preparation

In [None]:
image_path = "./data/cat_dogs/"

In [None]:
def download_images(query, limit, output_dir):
    
    downloader.download(query,
                        limit=limit,
                        output_dir=output_dir,
                        adult_filter_off=True,
                        force_replace=False,
                        timeout=60)

download_images("cat", 100, image_path)
download_images("dog", 100, image_path)

### Check the downloaded images

In [None]:
for category in ["cat","dog"]:
    data_dir = os.path.join(image_path, category)
    image_extensions = [".png", ".jpg"]  # add there all your images file extensions

    img_type_accepted_by_tf = ["bmp", "gif", "jpeg", "png"]
    for filepath in Path(data_dir).rglob("*"):
        if filepath.suffix.lower() in image_extensions:
            img_type = imghdr.what(filepath)
            if img_type is None:
                print(f"{filepath} is not an image")
            elif img_type not in img_type_accepted_by_tf:
                print(f"{filepath} is a {img_type}, not accepted by TensorFlow")

**Delete any invalid images**

## Set up the sharepoint sync path

In [None]:
# Load environment variables from .env file
load_dotenv()

# Get paths from environment variables
sharepoint_source_path = os.getenv('SHAREPOINT_SOURCE_PATH')
local_destination_path = os.getenv('LOCAL_DESTINATION_PATH')
local_source_path = os.getenv('LOCAL_SOURCE_PATH')
sharepoint_destination_path = os.getenv('SHAREPOINT_DESTINATION_PATH')

## Model Training

### Define parameters

In [None]:
# Define hyperparameters and input data
learning_rate = 0.02
num_epochs = 10
batch_size = 32
input_shape = (224, 224, 3)
expno = 5

In [None]:
# Define names for tensorboard logging and mlflow
experiment_name = "cat-dog-classifier"
run_name = f"Experiment_{expno}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

### Read Data

In [None]:
# Load the dataset
train_dataset = keras.preprocessing.image_dataset_from_directory(
    image_path,
    validation_split=0.2,
    subset="training",
    seed=1337,
    image_size=input_shape[:2],
    batch_size=batch_size,
)

In [None]:
val_dataset = keras.preprocessing.image_dataset_from_directory(
    image_path,
    validation_split=0.2,
    subset="validation",
    seed=1337,
    image_size=input_shape[:2],
    batch_size=batch_size,
)

Look at some sample images from the datasets

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in train_dataset.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in val_dataset.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")

### Data Augmentation

In [None]:
data_augmentation = keras.Sequential(
    [
        keras.layers.RandomFlip("horizontal"),
        keras.layers.RandomRotation(0.1),
    ]
)

In [None]:
plt.figure(figsize=(10, 10))
for images, _ in train_dataset.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images, training=True)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        plt.axis("off")

In [None]:
augmented_train_dataset = train_dataset.map(
    lambda x, y: (data_augmentation(x, training=True), y))

### Define Model and Train

In [None]:
# Define the base model and add a classifier on top
base_model = MobileNet(input_shape=input_shape, include_top=False, weights="imagenet")
base_model.trainable = False
model = keras.Sequential([
    base_model,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(128, activation="relu"),
    keras.layers.Dense(2, activation="softmax")
])

In [None]:
 # Compile the model with a loss function and optimizer
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
    metrics=["accuracy"],
)

In [None]:
logdir = os.path.join("logs", experiment_name, run_name)
tb_callback = keras.callbacks.TensorBoard(log_dir=logdir, write_graph=True, histogram_freq=1)

In [None]:
# Train the model and log metrics and the model itself to MLflow
history = model.fit(
    augmented_train_dataset,
    epochs=num_epochs,
    validation_data=val_dataset,
    verbose=2,
    callbacks=[tb_callback]
)

## MLFLow Logging and Visualization

In [None]:
# Set the experiment name and create an MLflow run
mlflow.set_experiment(experiment_name)
with mlflow.start_run(run_name = run_name) as mlflow_run:
    
    mlflow.set_experiment_tag("base_model", "MobileNet")
    mlflow.set_tag("dataset", "cat_dog")
    mlflow.set_tag("optimizer", "keras.optimizers.Adam")
    mlflow.set_tag("loss", "sparse_categorical_crossentropy")

    mlflow.keras.log_model(model, "model")

    mlflow.log_param("learning_rate", learning_rate)
    mlflow.log_param("num_epochs", num_epochs)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("input_shape", input_shape)

    mlflow.log_metric("train_loss", history.history["loss"][-1])
    mlflow.log_metric("train_acc", history.history["accuracy"][-1])
    mlflow.log_metric("val_loss", history.history["val_loss"][-1])
    mlflow.log_metric("val_acc", history.history["val_accuracy"][-1])

    # Log an artifact (e.g., a plot)
    import matplotlib.pyplot as plt
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    x_axis = np.arange(1, num_epochs + 1)

    fig, (ax1, ax2) = plt.subplots(1,2)
    fig.suptitle('Training Statistics', fontsize='xx-large')
    fig.set_figwidth(15)

    ax1.set_title('Loss')
    # ax1.set_yscale('log') 
    ax1.plot(x_axis, loss)
    ax1.plot(x_axis, val_loss)
    ax1.legend(['training', 'validation'])

    ax2.set_title('Accuracy')
    # ax2.set_yscale('log') 
    ax2.plot(x_axis, acc)
    ax2.plot(x_axis, val_acc)
    ax2.legend(['training', 'validation'])
    plt.savefig("plot.png")
    mlflow.log_artifact("plot.png")

    mlflow_run_id = mlflow_run.info.run_id
    print("MLFlow Run ID: ", mlflow_run_id)

### Export MLFLow Artifacts to Sharepoint

In [None]:
# Export data to SharePoint
export_to_sharepoint(local_source_path, sharepoint_destination_path)

## Model Validation

In [None]:
img = keras.preprocessing.image.load_img(
    os.path.join(image_path, "cat/Image_17.jpg"), target_size=input_shape
)
img_array = keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)  # Create batch axis

In [None]:
predictions = model.predict(img_array)
print("This image is {:.2f}% cat and {:.2f}% dog.".format(100 * float(predictions[0][0]),
                                                          100 * float(predictions[0][1])))

plt.imshow(img_array[0].numpy().astype("uint8"))