### Task - 3 : Implement a support vector machine (SVM) to classify images of cats and dogs from the Kaggle dataset.



## Libraries 


In [200]:
import os
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import cv2

In [201]:
folder_path = f"Cat Dog Dataset/"
os.makedirs(folder_path, exist_ok = True)

# define path
confusion_image_path = os.path.join(folder_path, "confusion matrix.png")
classification_file_path = os.path.join(folder_path, "classification_report.txt")
model_file_path = os.path.join(folder_path, "svm_model.pkl")

# Path dataset
dataset_dir = "Cat Dog Dataset/"
train_dir = "Cat Dog Dataset/train"
test_dir = "Cat Dog Dataset/test"

## Load the Data, Preprocessing data and labeling

In [202]:
# Read and preprocess images from the train folder
def preprocess_images(directory):
    images = []
    labels = []
    for label in os.listdir(directory):
        label_dir = os.path.join(directory, label)
        for filename in os.listdir(label_dir):
            img = cv2.imread(os.path.join(label_dir, filename))
            img = cv2.resize(img, (100,100))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            images.append(img)
            labels.append(0 if label == "cats" else 1)

    return images,labels

In [203]:
train_images, train_labels = preprocess_images(train_dir)

In [204]:
# Combine train images and labels
images = np.array(train_images)
labels = np.array(train_labels)

In [205]:
images

array([[[177, 174, 165, ..., 223, 135, 141],
        [178, 173, 181, ..., 223, 140, 143],
        [179, 169, 176, ..., 223, 137, 138],
        ...,
        [190, 188, 189, ..., 210, 201, 202],
        [187, 183, 190, ..., 198, 194, 201],
        [190, 189, 189, ..., 203, 200, 201]],

       [[239, 239, 239, ...,  11,  11,  11],
        [239, 239, 239, ...,  11,  11,  11],
        [239, 239, 239, ...,  11,  11,  11],
        ...,
        [114, 114, 114, ...,  26,  25,  24],
        [114, 110, 115, ...,  23,  22,  21],
        [114, 106, 115, ...,  21,  20,  19]],

       [[ 19,  20,  21, ...,  17,  17,  17],
        [ 19,  20,  21, ...,  17,  17,  17],
        [ 19,  20,  21, ...,  17,  17,  17],
        ...,
        [ 48,  51,  54, ...,  12,  12,  11],
        [ 50,  53,  55, ...,  11,  11,  10],
        [ 50,  54,  56, ...,  11,  11,  10]],

       ...,

       [[ 97, 131, 143, ...,  84,  84,  84],
        [ 95, 118, 133, ...,  84,  83,  83],
        [ 84,  98, 124, ...,  84,  82,  82

In [206]:
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [207]:
# Flatten images to use as feature
X_train = images.reshape(len(images), -1)
y_train = labels

In [208]:
# Initialize and train the SVM model

svm = SVC(kernel = "linear",
         C = 1.0,
         random_state = 42)
svm.fit(X_train,y_train)

In [209]:
# Define a function to preprocess test image
def preprocess_test_images(directory):
    test_images = []
    test_filenames = []
    for label in os.listdir(directory):
        label_dir = os.path.join(directory,label)
        for filename in os.listdir(label_dir):
            img = cv2.imread(os.path.join(label_dir, filename))
            # cv2.imshow("abc",img)
            img = cv2.resize(img, (100,100))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            test_images.append(img)
            test_filenames.append(filename)
        
    return test_images,test_filenames

In [210]:
# Preprocess test Images
test_images, test_filenames = preprocess_test_images(test_dir)

In [211]:
# Flatten test image
X_test = np.array(test_images).reshape(len(test_images),-1)

In [212]:
X_test

array([[128, 139, 140, ...,  47,  47,  49],
       [255, 255, 255, ..., 255, 255, 255],
       [  0,   0,   0, ...,  14,   5,   6],
       ...,
       [173, 157, 168, ..., 182, 178, 169],
       [194, 196, 198, ...,  42,  69,  56],
       [101,  90,  85, ..., 168, 169, 168]], dtype=uint8)

In [213]:
# Predict labels for test set
y_pred = svm.predict(X_test)

In [214]:
y_pred

array([0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 1, 1, 0, 1])

In [215]:
# Map predictions to cat or dog and print them
label_mapping = {0 : "Cat", 1 : "Dog"}


In [216]:
import matplotlib.pyplot as plt

In [219]:
print("Prediction for the test dataset: ")
for filename, prediction in zip(test_filenames, y_pred):
    label = label_mapping[prediction]
    print(f"File: {filename}, Prediction: {label}")
    for lab in os.listdir(test_dir):
        label_dir = os.path.join(test_dir,lab)
        for f in os.listdir(label_dir):
            # print(f)
            if f == filename:
                # img = cv2.imread(os.path.join(label_dir, filename))
                # cv2.imshow(label, img)
            
                # b,g,r = cv2.split(img)
                # img_rgb = cv2.merge((r,g,b))
                # plt.imshow(img_rgb)
                # plt.title(label)
                # plt.show()
                # break

                pass


Prediction for the test dataset: 
File: cat_1.jpg, Prediction: Cat
File: cat_106.jpg, Prediction: Cat
File: cat_109.jpg, Prediction: Cat
File: cat_113.jpg, Prediction: Cat
File: cat_114.jpg, Prediction: Dog
File: cat_116.jpg, Prediction: Cat
File: cat_118.jpg, Prediction: Dog
File: cat_119.jpg, Prediction: Dog
File: cat_124.jpg, Prediction: Cat
File: cat_140.jpg, Prediction: Dog
File: cat_147.jpg, Prediction: Cat
File: cat_156.jpg, Prediction: Cat
File: cat_158.jpg, Prediction: Dog
File: cat_162.jpg, Prediction: Cat
File: cat_18.jpg, Prediction: Cat
File: cat_190.jpg, Prediction: Cat
File: cat_203.jpg, Prediction: Cat
File: cat_223.jpg, Prediction: Dog
File: cat_234.jpg, Prediction: Dog
File: cat_244.jpg, Prediction: Cat
File: cat_251.jpg, Prediction: Cat
File: cat_255.jpg, Prediction: Dog
File: cat_268.jpg, Prediction: Cat
File: cat_279.jpg, Prediction: Cat
File: cat_281.jpg, Prediction: Cat
File: cat_290.jpg, Prediction: Cat
File: cat_306.jpg, Prediction: Cat
File: cat_313.jpg, Predi