## Step 1: Define the Dataset Class

>- We'll create a class to handle loading and preprocessing the dataset, including data augmentation

In [1]:
import os
import numpy as np
import cv2  # OpenCV for image processing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
class CatDogClassifier:
    def __init__(self, data_dir, img_size=(128, 128)):
        self.data_dir = data_dir
        self.img_size = img_size

    def load_data(self):
        images = []
        labels = []
        
        for label in ['cat', 'dog']:
            folder = os.path.join(self.data_dir, 'train', label)
            for filename in os.listdir(folder):
                img_path = os.path.join(folder, filename)
                img = cv2.imread(img_path)
                img = cv2.resize(img, self.img_size)
                img_array = img.flatten() / 255.0  # Flatten and normalize to [0, 1]
                images.append(img_array)
                labels.append(0 if label == 'cat' else 1)  # 0 for cats, 1 for dogs
        
        return np.array(images), np.array(labels)

## Step 2: Define the Model Class

>- Next, we will define a class for building and training the SVM model.

In [3]:
class ModelTrainer:
    def __init__(self):
        self.model = None

    def train(self, X_train, y_train):
        # Create a pipeline with StandardScaler and SVC
        self.model = make_pipeline(StandardScaler(), SVC())

        # Define parameter grid for hyperparameter tuning
        param_grid = {
            'svc__C': [0.1, 1, 10],
            'svc__gamma': [0.01, 0.1, 1],
            'svc__kernel': ['linear', 'rbf']
        }

        # Create GridSearchCV object with verbose=3 to display progress
        grid_search = GridSearchCV(
            self.model,
            param_grid,
            cv=3,
            verbose=3,  # Show detailed output during training
            n_jobs=-1   # Use all available CPU cores for parallel processing
        )

        print("Starting Grid Search...")
        
        # Fit on training data
        grid_search.fit(X_train, y_train)

        # Best parameters and model
        self.model = grid_search.best_estimator_
        print("Best parameters:", grid_search.best_params_)
        print("Best cross-validation score:", grid_search.best_score_)

    def predict(self, X_test):
        return self.model.predict(X_test)

## Step 3: Putting It All Together

>- Now we will create an instance of our classes and execute the training process.

In [4]:
#os.chdir(Path('../../../'))
#Path('../../../cat_dog/sample')

In [5]:
if __name__ == "__main__":
    data_directory = '../../../cat_dog/sample'

    classifier = CatDogClassifier(data_directory)
    X, y = classifier.load_data()

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    trainer = ModelTrainer()
    trainer.train(X_train, y_train)

    # Make predictions on the test set
    y_pred = trainer.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print("Test Accuracy:", accuracy)


Starting Grid Search...
Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV 2/3] END svc__C=0.1, svc__gamma=0.1, svc__kernel=linear;, score=0.461 total time= 1.8min
[CV 1/3] END svc__C=0.1, svc__gamma=0.1, svc__kernel=linear;, score=0.569 total time= 1.8min
[CV 3/3] END svc__C=0.1, svc__gamma=0.01, svc__kernel=linear;, score=0.560 total time= 1.9min
[CV 2/3] END svc__C=0.1, svc__gamma=0.01, svc__kernel=linear;, score=0.461 total time= 1.9min
[CV 1/3] END svc__C=0.1, svc__gamma=0.01, svc__kernel=linear;, score=0.569 total time= 1.9min
[CV 1/3] END svc__C=0.1, svc__gamma=0.01, svc__kernel=rbf;, score=0.506 total time= 2.5min
[CV 3/3] END svc__C=0.1, svc__gamma=0.01, svc__kernel=rbf;, score=0.504 total time= 2.5min
[CV 2/3] END svc__C=0.1, svc__gamma=0.01, svc__kernel=rbf;, score=0.506 total time= 2.6min
[CV 3/3] END svc__C=0.1, svc__gamma=0.1, svc__kernel=linear;, score=0.560 total time= 1.6min
[CV 1/3] END svc__C=0.1, svc__gamma=1, svc__kernel=linear;, score=0.569 total tim