# Task 3: Classify Cats and Dogs images using SVM

# Link to Dataset used
https://www.kaggle.com/c/dogs-vs-cats/data

# Importing Libraries

In [11]:
# Importing the libraries
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, f1_score, classification_report
import os
from PIL import Image
from skimage import color
from skimage.feature import hog

# Loading images and extracting the labels

In [51]:
# Function to import and load images from the folders
def load_images(folder, target_size = (64,64)):
    images = []
    filenames = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = Image.open(img_path)
        img = img.resize(target_size)
        img_array = np.array(img)

        # Converting images from RGB to Grey scale 
        gray_img = color.rgb2gray(img_array)

        # Using HoG feature extraction to enhance accuracy
        feature = hog(gray_img, pixels_per_cell=(32,32), cells_per_block=(2,2),  block_norm='L2-Hys', visualize=False)
        
        images.append(feature)
        filenames.append(filename)

    return images, filenames

In [52]:
all_images, train_filenames = load_images('train')

In [55]:
# Function to extract the labels from the image name in the data set
def extract_labels(filenames):
    labels = []
    for filename in filenames:
        label = filename.split('.')[0] # Extract "cat" or "dog"
        labels.append(label)
    return labels

all_labels = extract_labels(train_filenames)

# Function to evaluate model's performance 

In [56]:
# Evaluation Metrics Function
def evaluate_model(true_labels, predicted_labels, label_encoder):
    # Confusion Matrix
    conf_matrix = confusion_matrix(true_labels, predicted_labels)
    print("Confusion Matrix:\n", conf_matrix)
    
    # Accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    print(f"Accuracy: {accuracy:.2f}")
    
    # Precision
    precision = precision_score(true_labels, predicted_labels, average='binary', pos_label=1)
    print(f"Precision: {precision:.2f}")
    
    # Recall
    recall = recall_score(true_labels, predicted_labels, average='binary', pos_label=1)
    print(f"Recall: {recall:.2f}")
    
    # F1 Score
    f1 = f1_score(true_labels, predicted_labels, average='binary', pos_label=1)
    print(f"F1 Score: {f1:.2f}")
    
    # Classification Report (gives precision, recall, f1-score for each class)
    report = classification_report(true_labels, predicted_labels, target_names = label_encoder.classes_)
    print("Classification Report:\n", report)

# Encoding the data and splitting train and test sets

In [57]:
# Flatten the images
images = [img.flatten() for img in all_images]

# Encode labels as 0 and 1 for "cat" and "dog" respectively
label_encoding = LabelEncoder()
labels = label_encoding.fit_transform(all_labels)

# Splitting the dataset into train and test 
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=0)

# Training SVM, extracting predictions, and evaluating the model

In [58]:
# Training the SVM model 
classifier = svm.SVC(kernel='rbf')
classifier.fit(X_train, y_train)
test_predictions = classifier.predict(X_test)

# Convert the predictions and true labels back to "cat" or "dog" labels
test_labels_predicted = label_encoding.inverse_transform(test_predictions)
y_test_labels = label_encoding.inverse_transform(y_test)

In [61]:
print(y_test_labels)

['dog' 'cat' 'dog' ... 'cat' 'cat' 'cat']


In [62]:
print(y_test)

[1 0 1 ... 0 0 0]


In [63]:
print(test_labels_predicted)

['dog' 'cat' 'cat' ... 'dog' 'cat' 'cat']


In [64]:
print(test_predictions)

[1 0 0 ... 1 0 0]


In [65]:
evaluate_model(y_test, test_predictions, label_encoding)

Confusion Matrix:
 [[1712  784]
 [ 665 1839]]
Accuracy: 0.71
Precision: 0.70
Recall: 0.73
F1 Score: 0.72
Classification Report:
               precision    recall  f1-score   support

         cat       0.72      0.69      0.70      2496
         dog       0.70      0.73      0.72      2504

    accuracy                           0.71      5000
   macro avg       0.71      0.71      0.71      5000
weighted avg       0.71      0.71      0.71      5000

