In [None]:
import cv2
import numpy as np
import os
from sklearn.cluster import MiniBatchKMeans
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# -------------------------------
# Step 1: Load dataset
# -------------------------------
def load_images_from_folder(folder):
    images = []
    labels = []
    for label_folder in os.listdir(folder):
        label_path = os.path.join(folder, label_folder)
        if not os.path.isdir(label_path):
            continue
        for filename in os.listdir(label_path):
            img_path = os.path.join(label_path, filename)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            images.append(img)
            labels.append(label_folder)
    return images, labels

images, labels = load_images_from_folder("dataset")  
# dataset/positive/*.jpg  
# dataset/negative/*.jpg

print("Loaded", len(images), "images")

# -------------------------------
# Step 2: Extract SIFT features
# -------------------------------
sift = cv2.SIFT_create()

all_descriptors = []
for img in images:
    kp, desc = sift.detectAndCompute(img, None)
    if desc is not None:
        all_descriptors.append(desc)

all_descriptors = np.vstack(all_descriptors)
print("Total descriptors:", all_descriptors.shape)

# -------------------------------
# Step 3: K-means (Visual Vocabulary)
# -------------------------------
VOCAB_SIZE = 200

kmeans = MiniBatchKMeans(n_clusters=VOCAB_SIZE, batch_size=100, verbose=1)
kmeans.fit(all_descriptors)

# -------------------------------
# Step 4: Build BoVW histograms
# -------------------------------
def compute_histogram(desc):
    if desc is None:
        return np.zeros(VOCAB_SIZE)
    words = kmeans.predict(desc)
    hist, _ = np.histogram(words, bins=np.arange(VOCAB_SIZE+1))
    return hist

bovw_features = []

for img in images:
    kp, desc = sift.detectAndCompute(img, None)
    hist = compute_histogram(desc)
    bovw_features.append(hist)

bovw_features = np.array(bovw_features)

# Normalize features
scaler = StandardScaler()
bovw_features = scaler.fit_transform(bovw_features)

# -------------------------------
# Step 5: Split train/test
# -------------------------------
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(bovw_features, labels, test_size=0.2, random_state=42)

# -------------------------------
# Step 6: Train SVM Classifier
# -------------------------------
svm = SVC(kernel="linear")
svm.fit(X_train, y_train)

# -------------------------------
# Step 7: Evaluate
# -------------------------------
y_pred = svm.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print("Accuracy:", acc)
