<a href="https://colab.research.google.com/github/Murugavelraja/PRODIGY_ML/blob/main/Cats_vs_Dogs_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Cat vs Dog using SVM**

In [None]:
import kagglehub
biaiscience_dogs_vs_cats_path = kagglehub.dataset_download('biaiscience/dogs-vs-cats')

print('Data source import complete.')


Downloading from https://www.kaggle.com/api/v1/datasets/download/biaiscience/dogs-vs-cats?dataset_version_number=1...


100%|██████████| 817M/817M [00:18<00:00, 47.0MB/s]

Extracting files...





<a id='step11'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 1.1 | <b></span><span style="color:white">Import libraries</span></span>

In [None]:
pip install silence_tensorflow

In [None]:
# Call a funtion from silence_tensorflow librariy to silent tensorflow warnings

from silence_tensorflow import silence_tensorflow
silence_tensorflow()

In [None]:
import os
import re
import cv2
import random
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
from termcolor import colored
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.applications import VGG19
from tensorflow.keras import models, layers, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping



print(colored('All libraries imported succesfully', 'green', attrs=["bold"]))

<a id='step12'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 1.1 | <b></span><span style="color:white">Liraries Configuration</span></span>

In [None]:
# Ignore warnings
warnings.filterwarnings('ignore')
sns.set_style("darkgrid")
print(colored('All libraries configed succesfully', 'green', attrs=['bold']))

<a id='step21'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 2.1 | <b></span><span style="color:white">Importing Data</span></span>

In [None]:
# Add loaction of train folder in data

train_folder = '/kaggle/input/dogs-vs-cats/train/train'

<a id='step22'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 2.2 | <b></span><span style="color:white">Count data and classes</span></span>

In [None]:
# Count our data samples

total_count = len(os.listdir(train_folder))
print(colored(f'Number of samples in train folder : {total_count}', 'blue', attrs=['bold']))

In [None]:
# count number of cats and dogs

cat_count, dog_count = 0, 0
for file_name in os.listdir(train_folder) :
    cat = re.findall("\Acat", file_name)
    dog = re.findall("\Adog", file_name)
    if cat :
        cat_count+=1
    if dog :
        dog_count+=1
print(colored(f'Number of cats : {cat_count}', 'blue', attrs=['bold']))
print(colored(f'Number of dogs : {dog_count}', 'blue', attrs=['bold']))
total_count = [cat_count, dog_count]

In [None]:
# presentation of number of cats and dogs

plt.figure(figsize=(15, 4))
ax = sns.barplot(x=total_count, y=['Cat', 'Dog'], orient='h', color='navy')
ax.set_xticks(np.arange(0, 14500, 2000))
ax.set_xlabel('Number of Images')
ax.set_ylabel('Classes')
ax.set_title('Number of samples for each class', fontsize=20)
for i, p in enumerate(ax.patches) :
    ax.text(p.get_width(), p.get_y() + p.get_height() / 2.,
            '{}'.format(total_count[i]),
             va="center", fontsize=15)

<a id='step23'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 2.3 | <b></span><span style="color:white">Conevert data to a DataFrame</span></span>

In [None]:
temp_df = []
for file_name in os.listdir(train_folder) :
    if re.findall("\Acat", file_name) :
        class_name = 'cat'
    else :
        class_name = 'dog'
    temp_df.append((os.path.join(train_folder, file_name), class_name))
df = pd.DataFrame(temp_df, columns=['file_path', 'label'])
df

<a id='step24'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 2.4 | <b></span><span style="color:white">Showing 6 image from each class</span></span>

In [None]:
# Randomly show 6 images of each classes

def plot_6_image(images, title) :
    plt.figure(figsize=(15, 3))
    for i, path in enumerate(images) :
        plt.subplot(1, 6, i+1)
        img = cv2.imread(path)
        plt.imshow(img)
        plt.axis('off')
    plt.suptitle(title, fontsize=30, fontweight='bold')
    plt.tight_layout()
    plt.show()

# Setting the random seed for reproducibility
np.random.seed(42)

for classes in ['cat', 'dog'] :
    temp_df = df.file_path[df.label==classes]
    random_image = np.random.choice(temp_df, 6)
    plot_6_image(random_image, classes)

<a id='step2.5'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 2.5 | <b></span><span style="color:white">Image Sizes</span></span>

In [None]:
# Define a function to find out the width*height (dimentino) of images

def image_size(train_folder) :
    height = []
    width = []

    unique_dims = set()

    for file_name in os.listdir(train_folder) :
        img_path = os.path.join(train_folder, file_name)
        img = cv2.imread(img_path)
        if img is not None :
            unique_dims.add((img.shape[0], img.shape[1]))
            #
            width.append(img.shape[0])
            height.append(img.shape[1])

    if len(unique_dims)==1 :
        print(colored(f'All images have a unique shape of {list(unique_dims)[0]}', 'green', attrs=['bold']))
    else :
        print(colored(f'There are {len(unique_dims)} different image dimensions', 'red', attrs=['bold']))
        print(colored(f'Min height is {min(height)}, Min width is {min(width)}', 'red'))
        print(colored(f'Max height is {max(height)}, Max width is {max(width)}', 'red'))

In [None]:
image_size(train_folder)

<a id='step2.6'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 2.6 | <b></span><span style="color:white">Split data to 'train', 'validate', 'test'</span></span>

In [None]:
print(colored(f'df shape : {df.shape}', 'green', attrs=['bold']))

In [None]:
# Use train_test_split to split data to train, validation and test

train_df_full, val_df = train_test_split(df, stratify=df['label'], test_size=0.2, random_state=42)
train_df, test_df = train_test_split(train_df_full, stratify=train_df_full['label'], test_size=0.2, random_state=42)

print(colored(f'Training shape : {train_df.shape}', 'green', attrs=['bold']))
print(colored(f'Validation shape : {val_df.shape}', 'green', attrs=['bold']))
print(colored(f'Test shape : {test_df.shape}', 'green', attrs=['bold']))

<a id='step27'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 2.7 | <b></span><span style="color:white">Add Noises</span></span>

In [None]:
def add_gausian_noises(image, mean=0, std=0.05) :
    ''' Add Gausian noise to an image '''
    gaussian = np.random.normal(mean, std, image.shape)
    noisy_image = image + gaussian
    noisy_image = np.clip(noisy_image, 0, 255)
    return noisy_image

In [None]:
def apply_blur (image, kernel_size=5) :
    ''' Apply gaussian blur to an image '''
    return cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)

In [None]:
def downsample_upsample(image, scale_percent=50) :
    ''' Downsample and upsample an image '''
    width = int(image.shape[1] * scale_percent / 100)
    height = int(image.shape[0] * scale_percent / 100)
    dim = (width, height)
    resized = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
    return cv2.resize(resized, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_LINEAR)

In [None]:
# Add above noises to train data

def augment_images(dataframe, base_dir):
    modified_data = []

    for _, row in dataframe.iterrows():
        img_path = row['file_path']
        label = row['label']
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        # Randomly apply one or more degradation methods
        choices = ['noise', 'blur', 'downsample']
        chosen_methods = random.sample(choices, k=random.randint(1, len(choices)))

        if 'noise' in chosen_methods:
            directory, filename = os.path.split(img_path)
            new_img_path = os.path.join(directory, f"noisy_{filename}")
            cv2.imwrite(new_img_path, img)
            img = add_gausian_noises(img)
            modified_data.append((new_img_path, label))
        if 'blur' in chosen_methods:
            directory, filename = os.path.split(img_path)
            new_img_path = os.path.join(directory, f"blur_{filename}")
            cv2.imwrite(new_img_path, img)
            img = apply_blur(img)
            modified_data.append((new_img_path, label))
        if 'downsample' in chosen_methods:
            directory, filename = os.path.split(img_path)
            new_img_path = os.path.join(directory, f"downup_{filename}")
            cv2.imwrite(new_img_path, img)
            img = downsample_upsample(img)
            modified_data.append((new_img_path, label))

    modified_images_df = pd.DataFrame(modified_data, columns=['file_path', 'label'])

    # Return the updated DataFrame with both original and modified images
    return pd.concat([dataframe, modified_images_df], ignore_index=True)

In [None]:
train_df = augment_images(train_df, train_folder)

In [None]:
print(colored(f'Training shape : {train_df.shape}', 'green', attrs=['bold']))
print(colored(f'Validation shape : {val_df.shape}', 'green', attrs=['bold']))
print(colored(f'Test shape : {test_df.shape}', 'green', attrs=['bold']))

In [None]:
# Show 6 image of each classes after adding noises

# Setting the random seed for reproducibility
np.random.seed(42)
print(colored('Images After augmentation :', 'green', attrs=['bold']))

for classes in ['cat', 'dog'] :
    temp_df = df.file_path[df.label==classes]
    random_image = np.random.choice(temp_df, 6)
    plot_6_image(random_image, classes)

<a id='step28'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 2.8 | <b></span><span style="color:white">Create data generator</span></span>

In [None]:
def create_data_generator(train_df, val_df, test_df, batch_size=32, image_dimentions=(150, 150)) :
    train_datagen = ImageDataGenerator(
        rescale=1.0/255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
    )

    validation_datagen = ImageDataGenerator(
        rescale=1.0/255,
    )

    test_datagen = ImageDataGenerator(
        rescale=1.0/255
    )


    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='file_path',
        y_col='label',
        target_size=image_dimentions,
        class_mode='binary',
        batch_size=batch_size,
        seed=42,
        shuffle=True,
    )

    validation_generator = validation_datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='file_path',
        y_col='label',
        target_size=image_dimentions,
        class_mode='binary',
        batch_size=batch_size,
        seed=42,
        shuffle=False,
    )

    test_generator = test_datagen.flow_from_dataframe(
        dataframe=test_df,
        x_col='file_path',
        y_col='label',
        target_size=image_dimentions,
        class_mode='binary',
        batch_size=batch_size,
        seed=42,
        shuffle=False,
    )

    return train_generator, validation_generator, test_generator

In [None]:
train_generator, val_generator, test_generator = create_data_generator(train_df, val_df, test_df)

# Fetch a batch of images and labels
batch_images, batch_labels = next(train_generator)

# Check the shape of the first image in the batch
print("Shape of the first image in the batch:", batch_images[0].shape)

<a id='step31'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 3.1 | <b></span><span style="color:white">Add VGG19 Model</span></span>

In [None]:
conv_base = VGG19(
    weights='imagenet',
    include_top=False,
    input_shape=(150, 150, 3)
)

In [None]:
conv_base.summary()

<a id='step32'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 3.2 | <b></span><span style="color:white">Freeze VGG19 layers</span></span>

In [None]:
for layer in conv_base.layers :
    print(layer.name)

In [None]:
# Freeze all layers, except last layer
# The goal is to train just last layer

conv_base.trainable = True
set_trainable = False

for layer in conv_base.layers :
    if layer.name == 'block5_conv1' :
        set_trainable = True
    if set_trainable :
        layer.trainable = True
    else :
        layer.trainable = False

<a id='step33'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 3.3 | <b></span><span style="color:white">Create and Compile a Model</span></span>

In [None]:
model = models.Sequential()
model.add(conv_base)
model.add(layers.Dropout(0.5))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
# Compile
model.compile(optimizer=optimizers.RMSprop(learning_rate=1e-5),
             loss='binary_crossentropy',
             metrics=['acc'])

<a id='step34'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 3.4 | <b></span><span style="color:white">Create CallBacks</span></span>

In [None]:
# 1) Model CheckPoint
checkpoint_cb = ModelCheckpoint('MyKerasModel.keras', save_best_only=True)

# 1) Early Stop
earlystop_cb = EarlyStopping(patience=10, restore_best_weights=True)

<a id='step35'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 3.5 | <b></span><span style="color:white">Train Model</span></span>

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=100,
    validation_data=val_generator,
    validation_steps=len(val_generator),
    callbacks=[checkpoint_cb, earlystop_cb]
)

In [None]:
pd.DataFrame(history.history).plot(figsize=(9, 4), linewidth=2)
plt.grid()
plt.show()

<a id='step37'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 3.7 | <b></span><span style="color:white">Model Evaluation</span></span>

In [None]:
best_model = models.load_model('MyKerasModel.keras')

<a id='step37'></a>
# <span style="background-color:#0a2342;background-size: cover;font-family:tahoma;font-size:100%;text-align:left;border-radius:5px 5px 5px 5px; padding:10px; border:solid 2px #09375b"><span style="color:#79a9d1"><b>Step 3.7 | <b></span><span style="color:white">Recover best Model</span></span>

In [None]:
test_loss, test_acc = best_model.evaluate_generator(test_generator)

print(colored(f'Test Loss : {test_loss}', 'green', attrs=['bold']))
print(colored(f'Test Accuracy : {test_acc}', 'green', attrs=['bold']))