# 🐱🐶 Cats vs Dogs Classification using SVM

In [None]:

import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from skimage.feature import hog
import joblib


In [None]:

csv_path = "images_dataset.csv"   # ensure this file is in the same folder
df = pd.read_csv(csv_path)
print("Rows:", len(df))
df.head()


In [None]:

# Change base_dir to where your images are located
base_dir = r"C:\Users\YourName\cats_and_dogs"

df['filepath'] = df['filename'].apply(lambda x: os.path.join(base_dir, x))

missing = [p for p in df['filepath'] if not os.path.exists(p)]
print("Missing files:", len(missing))
if missing:
    print("⚠️ Example missing:", missing[:5])


In [None]:

def load_images(filepaths, size=(128,128), grayscale=True, max_images=None):
    X = []
    skipped = 0
    total = len(filepaths) if max_images is None else min(len(filepaths), max_images)
    for p in tqdm(filepaths[:total], desc="Loading images"):
        try:
            img = Image.open(p).convert("RGB")
            img = img.resize(size, Image.BILINEAR)
            if grayscale:
                img = img.convert("L")
            X.append(np.array(img))
        except Exception:
            skipped += 1
    X = np.array(X)
    print(f"✅ Loaded {len(X)} images, skipped {skipped}")
    return X


In [None]:

filepaths = df['filepath'].values
labels = df['label'].values

X_images_gray = load_images(filepaths, size=(128,128), grayscale=True)

if len(X_images_gray) == 0:
    raise ValueError("❌ No images loaded! Check your base_dir path.")

X_images_gray = X_images_gray.astype("float32") / 255.0

hog_features = []
for img in tqdm(X_images_gray, desc="Computing HOG"):
    feat = hog(img, orientations=9, pixels_per_cell=(8,8),
               cells_per_block=(2,2), block_norm="L2-Hys",
               visualize=False, feature_vector=True)
    hog_features.append(feat)

X_hog = np.array(hog_features)
print("HOG feature shape:", X_hog.shape)


In [None]:

le = LabelEncoder()
y = le.fit_transform(labels[:len(X_hog)])

X_train, X_test, y_train, y_test = train_test_split(
    X_hog, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:

svc = SVC(kernel="rbf", class_weight="balanced")

param_grid = {"C": [1, 10], "gamma": ["scale", "auto"]}

grid = GridSearchCV(svc, param_grid, cv=3, n_jobs=-1, verbose=2)
grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)
best_svc = grid.best_estimator_


In [None]:

y_pred = best_svc.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le.classes_))


In [None]:

joblib.dump(best_svc, "svm_cats_dogs_model.joblib")
joblib.dump(scaler, "scaler.joblib")
joblib.dump(le, "label_encoder.joblib")
print("✅ Model and preprocessing saved!")
