# Cat vs Dog Image Classification using SVM
This notebook demonstrates the process of using Support Vector Machine (SVM) models to classify images into 'cats' or 'dogs'.

## Import Libraries
Import necessary libraries for image processing and machine learning.

In [1]:
import cv2
import os
import numpy as np
from skimage.feature import hog
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report
from sklearn import svm

## Function Definitions
Here, we define functions for image processing, loading data, and extracting features.

In [2]:
def process_image(img_path):
    """
    Processes an image: reads, converts to grayscale, resizes, and extracts HOG features.
    
        Args:
        img_path (str): Path to the image file.
    
        Returns:
        np.array: Array of HOG features of the image.
    """

    img = cv2.imread(img_path)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_gray = cv2.resize(img_gray, (64, 64))
    features, hog_image = hog(img_gray, orientations=9, pixels_per_cell=(8, 8), 
                              cells_per_block=(2, 2), visualize=True)
    return features

In [3]:
def process_directory(directory, label_code):
    """
    Processes all images in a directory using `process_image` function.
    
        Args:
        directory (str): Path to the directory containing images.
        label_code (int): Numeric label for images in this directory.
    
        Returns:
        Tuple: (List of image features, List of labels, List of filenames).

    """
    
    images = []
    filenames = []
    for filename in os.listdir(directory):
        if filename.startswith('.'):
            continue
        img_path = os.path.join(directory, filename)
        images.append(process_image(img_path))
        filenames.append(filename)
    return images, [label_code] * len(images), filenames

## Load and Process Data
Load the training and test data, process the images, and extract features.

In [4]:
def load_images_and_features(folder):
    """
    Loads and processes images from subdirectories of a folder, assigns labels, and extracts features.
    
        Args:
        folder (str): Path to the folder containing subdirectories of images.
    
        Returns:
        Tuple: (Array of all features, Array of all labels, List of all filenames).
    
    """
    
    all_images = []
    all_labels = []
    all_filenames = []
    for label in os.listdir(folder):
        path = os.path.join(folder, label)
        if not os.path.isdir(path):
            continue
        label_code = 0 if "cat" in label else 1
        images, labels, filenames = process_directory(path, label_code)
        all_images.extend(images)
        all_labels.extend(labels)
        all_filenames.extend(filenames)
    return np.array(all_images), np.array(all_labels), all_filenames

In [5]:
def map_label(numeric_label):
    """
    Maps a numeric label to a string label.
    
        Args:
        numeric_label (int): Numeric label to be mapped.
    
        Returns:
        str: Mapped string label ('cat' or 'dog').

    """

    return 'cat' if numeric_label == 0 else 'dog'

In [6]:
def create_results_df(model, X_test, y_test, test_filenames):
    """
    Creates a DataFrame of model predictions along with actual labels and correctness.
    
        Args:
        model (SVM model): Trained SVM model.
        X_test (np.array): Test features.
        y_test (np.array): Actual labels of test data.
        test_filenames (List): List of test image filenames.
    
        Returns:
        pd.DataFrame: DataFrame containing predictions, actual labels, and correctness.
    

    """
    
    y_pred = model.predict(X_test)
    mapped_predictions = [map_label(pred) for pred in y_pred]
    mapped_actuals = [map_label(actual) for actual in y_test]
    return pd.DataFrame({
        'Filename': test_filenames,
        'Prediction': mapped_predictions,
        'Actual': mapped_actuals,
        'Correct?': y_pred == y_test
    })

## Train SVM Models
Training different SVM models with various kernels (linear, RBF, sigmoid) to classify the images.

In [7]:
X_train, y_train, train_filenames = load_images_and_features('train')
X_test, y_test, test_filenames = load_images_and_features('test')

linear_model = svm.SVC(kernel='linear')
linear_model.fit(X_train, y_train)

rbf_model = svm.SVC(kernel='rbf', random_state = 0)
rbf_model.fit(X_train, y_train)

sigmoid_model = svm.SVC(kernel='sigmoid')
sigmoid_model.fit(X_train, y_train)

## Evaluate Models
Evaluate each model's performance using accuracy and classification reports.

In [8]:
def evaluate_model(model, X_test, y_test, test_filenames):
    """
        Evaluates a model on test data and prints results, accuracy, and classification report.
    
        Args:
        model (SVM model): Trained SVM model to evaluate.
        X_test (np.array): Test features for evaluation.
        y_test (np.array): Actual labels for evaluation.
        test_filenames (List): List of filenames for the test dataset.
    
        Returns:
        pd.DataFrame: DataFrame of the evaluation results. 

    """

    y_pred = model.predict(X_test)
    mapped_predictions = [map_label(pred) for pred in y_pred]
    mapped_actuals = [map_label(actual) for actual in y_test]
    results_df = pd.DataFrame({
        'Filename': test_filenames,
        'Prediction': mapped_predictions,
        'Actual': mapped_actuals,
        'Correct?': y_pred == y_test
    })
    print(results_df)
    print('Accuracy:', accuracy_score(y_test, y_pred))
    print('Classification Report:\n', classification_report(y_test, y_pred))
    return results_df


In [9]:
results_df1 = evaluate_model(linear_model, X_test, y_test, test_filenames)

        Filename Prediction Actual  Correct?
0    dog_147.jpg        cat    dog     False
1    dog_219.jpg        dog    dog      True
2    dog_191.jpg        cat    dog     False
3    dog_344.jpg        dog    dog      True
4    dog_150.jpg        dog    dog      True
..           ...        ...    ...       ...
135  cat_564.jpg        dog    cat     False
136  cat_203.jpg        cat    cat      True
137  cat_375.jpg        dog    cat     False
138  cat_162.jpg        dog    cat     False
139   cat_18.jpg        cat    cat      True

[140 rows x 4 columns]
Accuracy: 0.6214285714285714
Classification Report:
               precision    recall  f1-score   support

           0       0.62      0.64      0.63        70
           1       0.63      0.60      0.61        70

    accuracy                           0.62       140
   macro avg       0.62      0.62      0.62       140
weighted avg       0.62      0.62      0.62       140



In [10]:
results_df2 = evaluate_model(rbf_model, X_test, y_test, test_filenames)

        Filename Prediction Actual  Correct?
0    dog_147.jpg        cat    dog     False
1    dog_219.jpg        dog    dog      True
2    dog_191.jpg        cat    dog     False
3    dog_344.jpg        dog    dog      True
4    dog_150.jpg        dog    dog      True
..           ...        ...    ...       ...
135  cat_564.jpg        dog    cat     False
136  cat_203.jpg        dog    cat     False
137  cat_375.jpg        dog    cat     False
138  cat_162.jpg        dog    cat     False
139   cat_18.jpg        dog    cat     False

[140 rows x 4 columns]
Accuracy: 0.6571428571428571
Classification Report:
               precision    recall  f1-score   support

           0       0.68      0.59      0.63        70
           1       0.64      0.73      0.68        70

    accuracy                           0.66       140
   macro avg       0.66      0.66      0.66       140
weighted avg       0.66      0.66      0.66       140



In [11]:
results_df3 = evaluate_model(sigmoid_model, X_test, y_test, test_filenames)


        Filename Prediction Actual  Correct?
0    dog_147.jpg        cat    dog     False
1    dog_219.jpg        cat    dog     False
2    dog_191.jpg        cat    dog     False
3    dog_344.jpg        dog    dog      True
4    dog_150.jpg        dog    dog      True
..           ...        ...    ...       ...
135  cat_564.jpg        dog    cat     False
136  cat_203.jpg        dog    cat     False
137  cat_375.jpg        cat    cat      True
138  cat_162.jpg        dog    cat     False
139   cat_18.jpg        dog    cat     False

[140 rows x 4 columns]
Accuracy: 0.5642857142857143
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.70      0.62        70
           1       0.59      0.43      0.50        70

    accuracy                           0.56       140
   macro avg       0.57      0.56      0.56       140
weighted avg       0.57      0.56      0.56       140



# Exporting the Output to .csv file

In [12]:
def export_to_csv(df, filename, header_comment):
    """
    Exports a DataFrame to a CSV file with an optional header comment.
    
        Args:
        df (pd.DataFrame): DataFrame to be exported.
        filename (str): Name of the output CSV file.
        header_comment (str): Comment to be added as a header in the CSV file.
    
    """
    
    with open(filename, 'w') as f:
        f.write(f'# {header_comment}\n')
        df.to_csv(f, index=False)

In [13]:
export_to_csv(results_df1, 'linear.csv', 'Results from linear SVM model')
export_to_csv(results_df2, 'rbf.csv', 'Results from RBF SVM model')
export_to_csv(results_df3, 'sigmoid.csv', 'Results from sigmoid SVM model')