In [69]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm
from joblib import Parallel, delayed
from sklearn.utils import shuffle

# ✅ Update dataset paths based on your folder structure
base_dir = r"C:\Users\Sahil Raj\Downloads\cat-v-dog\dogs_cats_sample_1000"
train_dir = os.path.join(base_dir, "train")  # ✅ Train folder (contains 'cats' & 'dogs' subfolders)
test_dir = os.path.join(base_dir, "test")  # ✅ Test folder (contains 'cats' & 'dogs' subfolders)

# ✅ Image processing parameters
image_size = (96, 96)  # Increased size for better feature extraction
categories = ["cats", "dogs"]  # Subfolders inside train and test
label_map = {"cats": 0, "dogs": 1}  # Assign labels

# ✅ Function to extract HOG features
def extract_hog_features(image):
    return hog(image, orientations=9, pixels_per_cell=(8, 8), 
               cells_per_block=(2, 2), feature_vector=True)
# ✅ Function to load dataset from both 'cats' and 'dogs' subfolders
def load_dataset(directory, is_train=True, max_samples=500):
    features, labels = [], []
    print(f"\n🔄 Loading {'training' if is_train else 'testing'} images from: {directory}...")

    for category in categories:
        category_path = os.path.join(directory, category)
        if not os.path.exists(category_path):
            print(f"⚠ Warning: {category_path} does not exist. Skipping...")
            continue

        print(f"✅ Processing {category} images...")
        image_paths = [os.path.join(category_path, img) for img in os.listdir(category_path) if img.endswith(('.jpg', '.png'))]

        if len(image_paths) > max_samples:
            image_paths = np.random.choice(image_paths, max_samples, replace=False)  # Equal samples

        def process_image(img_path):
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                return None, None
            img = cv2.resize(img, image_size)
            return extract_hog_features(img), label_map[category]

        results = Parallel(n_jobs=-1)(delayed(process_image)(img_path) for img_path in tqdm(image_paths))

        for feature, label in results:
            if feature is not None:
                features.append(feature)
                labels.append(label)

    return np.array(features), np.array(labels)
# ✅ Load Balanced Training Dataset
X_train, y_train = load_dataset(train_dir, is_train=True, max_samples=500)
X_train, y_train = shuffle(X_train, y_train)  # Shuffle for better training
print("\n✅ Training Data Loaded:", X_train.shape)

if X_train.size == 0 or y_train.size == 0:
    raise ValueError("🚨 Error: No training images loaded! Check dataset path.")

# ✅ Train SVM Model with optimized hyperparameters
print("\n🚀 Training SVM Model...")
svm_model = SVC(kernel='rbf', C=100, gamma='scale', class_weight='balanced')
svm_model.fit(X_train, y_train)
print("✅ Model Training Complete!")

# ✅ Load Balanced Test Dataset
X_test, y_test = load_dataset(test_dir, is_train=False, max_samples=500)
print("\n✅ Testing Data Loaded:", X_test.shape)

if X_test.size == 0:
    raise ValueError("🚨 Error: No test images found! Check dataset path.")

# ✅ Predict on Test Dataset
y_pred = svm_model.predict(X_test)

# ✅ Display Test Results
print("\n🎯 Predictions on Test Set (1 = Dog, 0 = Cat):", y_pred[:10])
print("✅ Predicted Cats:", np.sum(y_pred == 0))
print("✅ Predicted Dogs:", np.sum(y_pred == 1))
print("\n✅ Classification Report:\n", classification_report(y_test, y_pred))

# ✅ Save the trained model
model_filename = "cat_dog_svm_model.pkl"
joblib.dump(svm_model, model_filename)
print(f"\n✅ Model saved as '{model_filename}'")


🔄 Loading training images from: C:\Users\Sahil Raj\Downloads\cat-v-dog\dogs_cats_sample_1000\train...
✅ Processing cats images...


100%|███████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 779.68it/s]


✅ Processing dogs images...


100%|███████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 755.07it/s]



✅ Training Data Loaded: (1000, 4356)

🚀 Training SVM Model...
✅ Model Training Complete!

🔄 Loading testing images from: C:\Users\Sahil Raj\Downloads\cat-v-dog\dogs_cats_sample_1000\test...
✅ Processing cats images...


100%|███████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 940.80it/s]


✅ Processing dogs images...


100%|███████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 887.02it/s]



✅ Testing Data Loaded: (1000, 4356)

🎯 Predictions on Test Set (1 = Dog, 0 = Cat): [0 0 1 0 0 1 0 0 0 0]
✅ Predicted Cats: 555
✅ Predicted Dogs: 445

✅ Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.77      0.73       500
           1       0.74      0.66      0.70       500

    accuracy                           0.71      1000
   macro avg       0.72      0.71      0.71      1000
weighted avg       0.72      0.71      0.71      1000


✅ Model saved as 'cat_dog_svm_model.pkl'
