In [1]:
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
import csv

In [2]:
# Step 1: Feature Extraction (SIFT)
def extract_sift_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    return keypoints, descriptors

In [3]:
# Step 2: Vocabulary Building (KMeans Clustering)
def build_vocabulary(descriptors, vocabulary_size):
    kmeans = KMeans(n_clusters=vocabulary_size)
    kmeans.fit(descriptors)
    vocabulary = kmeans.cluster_centers_
    return vocabulary


In [4]:
# Step 3: Quantization
def quantize_features(descriptors, vocabulary):
    num_features = descriptors.shape[0]
    feature_vector = np.zeros((1, len(vocabulary)), dtype=np.float32)
    for i in range(num_features):
        feature = descriptors[i]
        feature = feature.reshape(1, -1)
        distances = np.linalg.norm(feature - vocabulary, axis=1)
        nearest_word_index = np.argmin(distances)
        feature_vector[0, nearest_word_index] += 1
    return feature_vector

In [5]:
# Step 4: Histogram Representation
def compute_histogram(image_path, vocabulary):
    keypoints, descriptors = extract_sift_features(image_path)
    histogram = quantize_features(descriptors, vocabulary)
    return histogram


In [6]:
# Step 5: Combine SIFT descriptors with BoW histogram
def extract_combined_features(image_path, vocabulary):
    keypoints, descriptors = extract_sift_features(image_path)
    bow_histogram = compute_histogram(image_path, vocabulary)
    combined_features = np.hstack((descriptors.flatten(), bow_histogram.flatten()))
    return combined_features


In [7]:
def process_images_in_folder(folder_path, output_csv, vocabulary_size):
    results = []

    # Determine label based on folder name pattern
    label = 0 if "Negative_" in os.path.basename(folder_path) else 1

    # Extract SIFT features and build vocabulary
    descriptors_list = []
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            file_path = os.path.join(folder_path, filename)
            keypoints, descriptors = extract_sift_features(file_path)
            if descriptors is not None:
                descriptors_list.append(descriptors)
    descriptors_array = np.vstack(descriptors_list)
    vocabulary = build_vocabulary(descriptors_array, vocabulary_size)

    # Compute combined features for each image
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            file_path = os.path.join(folder_path, filename)
            combined_features = extract_combined_features(file_path, vocabulary)
            results.append({
                'filename': filename,
                'features': combined_features,
                'label': label
            })

    # Write the results to a CSV file
    with open(output_csv, mode='w', newline='') as file:
        fieldnames = ['filename', 'features', 'label']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for result in results:
            writer.writerow(result)

In [8]:
# Step 7: Process Multiple Folders
def process_multiple_folders(base_folder, vocabulary_size):
    for folder_name in os.listdir(base_folder):
        folder_path = os.path.join(base_folder, folder_name)
        if os.path.isdir(folder_path):
            output_csv = f'SIFTandBOW/{folder_name}_sift_bow_features.csv'
            process_images_in_folder(folder_path, output_csv, vocabulary_size)
            print(f"SIFT and BoW features have been calculated for images in {folder_path} and written to {output_csv}")

In [9]:
# Example usage
if __name__ == "__main__":
    folder = ['Rice_photos/BC-15', 'Rice_photos/Huongthom', 'Rice_photos/Nep87', 'Rice_photos/Q5', 'Rice_photos/Thien_uu', 'Rice_photos/Xi23']
    vocabulary_size = 100
    for i in range(len(folder)):
        base_folder = folder[i]
        process_multiple_folders(base_folder,vocabulary_size)

SIFT and BoW features have been calculated for images in Rice_photos/BC-15\BC-15 and written to SIFTandBOW/BC-15_sift_bow_features.csv
SIFT and BoW features have been calculated for images in Rice_photos/BC-15\Negative_BC-15 and written to SIFTandBOW/Negative_BC-15_sift_bow_features.csv
SIFT and BoW features have been calculated for images in Rice_photos/Huongthom\Huong_thom-1 and written to SIFTandBOW/Huong_thom-1_sift_bow_features.csv
SIFT and BoW features have been calculated for images in Rice_photos/Huongthom\Negative_Huong_thom-1 and written to SIFTandBOW/Negative_Huong_thom-1_sift_bow_features.csv
SIFT and BoW features have been calculated for images in Rice_photos/Nep87\Negative_Nep-87 and written to SIFTandBOW/Negative_Nep-87_sift_bow_features.csv
SIFT and BoW features have been calculated for images in Rice_photos/Nep87\Nep-87 and written to SIFTandBOW/Nep-87_sift_bow_features.csv
SIFT and BoW features have been calculated for images in Rice_photos/Q5\Negative_Q-5_modify and 