In [1]:
import cv2
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC, OneClassSVM
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score

# Configuration
CSV_PATH = "SIFT_SURF.csv"  # CSV file with filenames and labels
IMAGE_BASE_DIR = "/Images"  # Folder with all images
TOP_N = 5  # Number of strongest keypoints to consider
USE_SURF = True  # Set False to use SIFT

def load_dataset_from_csv(csv_path, base_dir, max_samples_per_class=None):
    """Load image file paths and labels from CSV file."""
    df = pd.read_csv(csv_path)
    
    # Optionally limit samples per class
    if max_samples_per_class:
        df = df.groupby("classCode").head(max_samples_per_class).reset_index(drop=True)
    
    # Build full image paths
    df['filepath'] = df['file'].apply(lambda x: os.path.join(base_dir, x))
    
    # Map labels to integers (example mapping)
    label_map = {"OK": 0, "NOK": 1}
    df['label_int'] = df['classCode'].map(label_map)
    
    paths = df['filepath'].tolist()
    labels = df['label_int'].values
    
    return paths, labels

def create_feature_extractor(use_surf=True):
    """Initialize SURF or SIFT feature extractor."""
    if use_surf:
        return cv2.xfeatures2d.SURF_create()
    else:
        return cv2.SIFT_create()

def extract_features(image_path, extractor, top_n=5):
    """Extract feature vector based on top keypoints' size and response."""
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return np.zeros(top_n * 2)
    keypoints, _ = extractor.detectAndCompute(img, None)
    if not keypoints:
        return np.zeros(top_n * 2)
    keypoints = sorted(keypoints, key=lambda x: -x.response)[:top_n]
    features = np.array([[kp.size, kp.response] for kp in keypoints]).flatten()
    if len(features) < top_n * 2:
        features = np.pad(features, (0, top_n * 2 - len(features)))
    return features

def build_features(paths, extractor, top_n=5):
    """Extract features from all images."""
    return np.array([extract_features(p, extractor, top_n) for p in paths])

In [2]:
# Load dataset from CSV
paths, labels = load_dataset_from_csv(CSV_PATH, IMAGE_BASE_DIR, max_samples_per_class=50)

In [3]:
# Create feature extractor
extractor = create_feature_extractor(USE_SURF)

error: OpenCV(4.12.0) D:\a\opencv-python\opencv-python\opencv_contrib\modules\xfeatures2d\src\surf.cpp:1028: error: (-213:The function/feature is not implemented) This algorithm is patented and is excluded in this configuration; Set OPENCV_ENABLE_NONFREE CMake option and rebuild the library in function 'cv::xfeatures2d::SURF::create'


In [None]:
# Extract features
features = build_features(paths, extractor, TOP_N)

In [None]:
# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(features)
y = labels

In [None]:
# Split dataset into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3)

In [None]:
# Train supervised SVM
svm = SVC(kernel='rbf', probability=True)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
acc = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, svm.predict_proba(X_test)[:, 1])
print(f"Supervised SVM | Accuracy: {acc:.3f} | AUC: {auc:.3f}")

In [None]:
# Train One-Class SVM on OK samples only for anomaly detection
ocsvm = OneClassSVM(gamma='auto', nu=0.1)
ocsvm.fit(X_train[y_train == 0])
y_pred_oc = ocsvm.predict(X_test)
y_pred_oc = np.where(y_pred_oc == 1, 0, 1)  # Map to binary labels
acc_oc = accuracy_score(y_test, y_pred_oc)
auc_oc = roc_auc_score(y_test, y_pred_oc)
print(f"One-Class SVM | Accuracy: {acc_oc:.3f} | AUC: {auc_oc:.3f}")
