In [None]:
# ================================
# 0. Import the important libs
# ================================

import os
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# ================================
# 1. Feature extraction function
# ================================
def extract_hog_features(img_path, img_size=(64, 64)):
    img = imread(img_path, as_gray=True)  # grayscale
    img_resized = resize(img, img_size)
    features = hog(img_resized, 
                   orientations=9, 
                   pixels_per_cell=(8, 8),
                   cells_per_block=(2, 2), 
                   block_norm='L2-Hys')
    return features

# ================================
# 2. Load dataset (first 2000 imgs)
# ================================
train_dir = r"C:\Users\farou\Downloads\train"  
X, y = [], []

for i, fname in enumerate(os.listdir(train_dir)):
    if i >= 2000:  # limit
        break
    label = 0 if "cat" in fname else 1   # 0=cat, 1=dog
    fpath = os.path.join(train_dir, fname)
    try:
        features = extract_hog_features(fpath)
        X.append(features)
        y.append(label)
    except Exception as e:
        print("Skipping:", fpath, "Error:", e)
        continue

X = np.array(X)
y = np.array(y)

print("✅ Dataset loaded!")
print("Feature matrix shape:", X.shape)
print("Labels shape:", y.shape)

# ================================
# 3. Train / validation split
# ================================
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training set:", X_train.shape, "Validation set:", X_val.shape)

# ================================
# 4. Scale features
# ================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# ================================
# 5. Grid search with RBF SVM
# ================================
svm = SVC(kernel="rbf")

param_grid = {
    "C": [0.1, 1, 10, 100],
    "gamma": [0.0001, 0.001, 0.01, 0.1]
}

grid = GridSearchCV(svm, param_grid, cv=3, verbose=1, n_jobs=-1)
grid.fit(X_train_scaled, y_train)

print("✅ Best parameters found:", grid.best_params_)

# ================================
# 6. Evaluate best model
# ================================
best_svm = grid.best_estimator_
y_pred = best_svm.predict(X_val_scaled)

print("✅ Accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred, target_names=["Cat","Dog"]))


✅ Dataset loaded!
Feature matrix shape: (2000, 1764)
Labels shape: (2000,)
Training set: (1600, 1764) Validation set: (400, 1764)
Fitting 3 folds for each of 16 candidates, totalling 48 fits
✅ Best parameters found: {'C': 10, 'gamma': 0.001}
✅ Accuracy: 0.7125
              precision    recall  f1-score   support

         Cat       0.70      0.74      0.72       200
         Dog       0.73      0.68      0.70       200

    accuracy                           0.71       400
   macro avg       0.71      0.71      0.71       400
weighted avg       0.71      0.71      0.71       400



In [None]:
# ================================
# 7. Load separate test dataset
# ================================
test_dir = r"C:\Users\farou\Downloads\test"
X_test, filenames = [], []

for fname in os.listdir(test_dir):
    fpath = os.path.join(test_dir, fname)
    try:
        features = extract_hog_features(fpath)
        X_test.append(features)
        filenames.append(fname)
    except Exception as e:
        print("Skipping:", fpath, "Error:", e)
        continue

X_test = np.array(X_test)

print("✅ Test set loaded!")
print("Feature matrix shape:", X_test.shape)

# ================================
# 8. Scale test data (using SAME scaler)
# ================================
X_test_scaled = scaler.transform(X_test)

# ================================
# 9. Predict with best model
# ================================
y_pred_test = best_svm.predict(X_test_scaled)

# ================================
# 10. Save my results into sheet
# ================================
import pandas as pd

results = pd.DataFrame({
    "Filename": filenames,
    "PredictedLabel": ["Cat" if p == 0 else "Dog" for p in y_pred_test]
})

# Save as Excel
results.to_excel("test_results.xlsx", index=False)

# Save as CSV
results.to_csv("test_results.csv", index=False)

print("✅ Results saved: test_results.xlsx & test_results.csv")


✅ Test set loaded!
Feature matrix shape: (1384, 1764)
✅ Results saved: test_results.xlsx & test_results.csv
