In [1]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def extract_sift_features(image):
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    return descriptors

def load_images(folder_path):
    images = []
    labels = []
    for label in os.listdir(folder_path):
        label_path = os.path.join(folder_path, label)
        for image_file in os.listdir(label_path):
            image_path = os.path.join(label_path, image_file)
            img = cv2.imread(image_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            images.append(img)
            labels.append(label)    
    return images, labels

def preprocess_images(images):
    sift = cv2.SIFT_create()
    sift_features_list = []
    for image in images:
        keypoints, descriptors = sift.detectAndCompute(image, None)
        sift_features_list.append(descriptors)
    return sift_features_list

def create_bag_of_words(all_descriptors, dictionary_size):
    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)
    flann = cv2.FlannBasedMatcher(index_params, search_params)
    
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 0.01)
    _, dictionary, _ = cv2.kmeans(all_descriptors, dictionary_size, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

    return dictionary

def convert_to_bow(descriptors, dictionary, flann):
    bow_feature = np.zeros(len(dictionary), dtype=np.float32)
    matches = flann.knnMatch(np.float32(descriptors), np.float32(dictionary), k=2)
    for match in matches:
        if len(match) >= 1:
            idx = match[0].trainIdx
            bow_feature[idx] += 1
    return bow_feature

def main():
    folder_path = './Images/'  # Replace this with the path to your dataset folder
    images, labels = load_images(folder_path)

    sift_features = preprocess_images(images)
    all_descriptors = [descriptor for sublist in sift_features for descriptor in sublist]

    dictionary_size = 100  # Define the size of the visual words vocabulary
    dictionary = create_bag_of_words(np.array(all_descriptors), dictionary_size)

    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)
    flann = cv2.FlannBasedMatcher(index_params, search_params)

    bow_features = [convert_to_bow(descriptors, dictionary, flann) for descriptors in sift_features]

    X = np.array(bow_features)
    y = np.array(labels)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=11)

    logistic_reg = LogisticRegression(max_iter=1000, solver='liblinear', multi_class='ovr')
    logistic_reg.fit(X_train, y_train)

    y_pred = logistic_reg.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print(f"Accuracy using SIFT and BoW: {accuracy}")

if __name__ == "__main__":
    main()


error: OpenCV(4.8.1) d:\a\opencv-python\opencv-python\opencv\modules\flann\include\opencv2\flann\nn_index.h:71: error: (-215:Assertion failed) queries.cols == veclen() in function 'cvflann::NNIndex<struct cvflann::L2<float> >::knnSearch'
