In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
from glob import glob
import KMeansClustering as kmc
from sklearn.decomposition import PCA
import cv2
import os
from tqdm import tqdm

In [2]:
def extract_color_histograms(image_path, patch_size=32, bins=8):
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded. Check the path and file format.")
    
    # Get the image dimensions
    height, width, _ = image.shape

    patches = []

    # Loop over the image with the specified patch size
    for i in range(0, height, patch_size):
        for j in range(0, width, patch_size):
            # Extract the patch (ensure it doesn't go out of bounds)
            patch = image[i:min(i + patch_size, height), j:min(j + patch_size, width)]
            
            histograms = []

            # Calculate histograms for each channel
            for channel in range(3):  # Assuming BGR channels
                hist = cv2.calcHist([patch], [channel], None, [bins], [0, 256])
                hist = hist / np.sum(hist)  # Normalize the histogram
                histograms.append(hist.flatten())
            
            # Concatenate histograms to create a feature vector
            feature_vector = np.concatenate(histograms)
            patches.append(feature_vector)
    
    return np.array(patches)
def process_image_directory(image_directory, output_directory):
    # Ensure the output directory exists
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    # Loop through all files in the image directory
    for image_file in os.listdir(image_directory):
        image_path = os.path.join(image_directory, image_file)
        
        # Check if the current file is a valid file (not a directory)
        if os.path.isfile(image_path):
            print(f"Processing image: {image_file}")
            
            # Extract feature vectors (assuming the function is defined elsewhere)
            feature_vectors = extract_color_histograms(image_path)
            
            # Define the output file path directly in the output directory
            class_name = os.path.splitext(image_file)[0]  # Get the file name without extension
            output_file = os.path.join(output_directory, f"{class_name}_features.npy")
            
            # Save the feature vectors to the output file
            np.save(output_file, feature_vectors)
            print(f"Features saved to {output_file}")

# Specify the input and output directories
image_directory = 'group01_2/group01/test/music_store'  # Path to the folder containing images
output_directory = 'featureVector/music_store/test'           # Path to save the extracted feature vectors

# Call the function
process_image_directory(image_directory, output_directory)


Processing image: sun_dbizycsfucqlktnk.jpg
Features saved to featureVector/music_store/test/sun_dbizycsfucqlktnk_features.npy
Processing image: sun_dbsadqzxemdqaqih.jpg
Features saved to featureVector/music_store/test/sun_dbsadqzxemdqaqih_features.npy
Processing image: sun_dcyykqocjeammfed.jpg
Features saved to featureVector/music_store/test/sun_dcyykqocjeammfed_features.npy
Processing image: sun_dewwrshgfvqarkpm.jpg
Features saved to featureVector/music_store/test/sun_dewwrshgfvqarkpm_features.npy
Processing image: sun_dezyrtacbxgoehoi.jpg
Features saved to featureVector/music_store/test/sun_dezyrtacbxgoehoi_features.npy
Processing image: sun_dfvnumnuiqwvnexc.jpg
Features saved to featureVector/music_store/test/sun_dfvnumnuiqwvnexc_features.npy
Processing image: sun_dgavxowprqmnpbtq.jpg
Features saved to featureVector/music_store/test/sun_dgavxowprqmnpbtq_features.npy
Processing image: sun_dgpmwtllspwrcjtt.jpg
Features saved to featureVector/music_store/test/sun_dgpmwtllspwrcjtt_featu

In [3]:
def load_all_feature_vectors(directory):
    all_features = []
    
    # Loop through all files in the directory
    for file in os.listdir(directory):
        file_path = os.path.join(directory, file)
        
        # Check if the file ends with ".npy" and load its contents
        if os.path.isfile(file_path) and file.endswith(".npy"):
            features = np.load(file_path)
            all_features.append(features)
    
    # Combine all features into a single array
    if all_features:
        return np.vstack(all_features)
    else:
        return np.array([])  # Return an empty array if no features are found

In [4]:

c1Train=load_all_feature_vectors('featureVector/bayou/train')
c2Train=load_all_feature_vectors('featureVector/desert_vegetation/train')
c3Train=load_all_feature_vectors('featureVector/music_store/train')


In [5]:

c1Test=load_all_feature_vectors('featureVector/bayou/test')
c2Test=load_all_feature_vectors('featureVector/desert_vegetation/test')
c3Test=load_all_feature_vectors('featureVector/music_store/test')

In [6]:
def cluster_features(all_features, n_clusters=32,mod=False):
    centroid,idx=kmc.KMeansClustering(all_features,n_clusters,maxIteration=100,mod=mod)
    return centroid

In [7]:

def compute_bovw_representation(image_features,n_clusters=32,mod=False):
    # Assign each feature vector to a cluster
    centroid,idx=kmc.KMeansClustering(image_features,n_clusters,maxIteration=100,mod=mod)
    # Count occurrences of each clusterd
    cluster_counts = np.bincount(idx, minlength=n_clusters)
    # Normalize the counts
    bovw_vector = cluster_counts / len(image_features)
    return bovw_vector

In [8]:
def process_images_for_bovw(input_directory, output_directory, n_clusters=32):
    # Ensure the output directory exists
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    # Iterate through all files in the input directory
    for file in tqdm(os.listdir(input_directory)):
        file_path = os.path.join(input_directory, file)
        
        # Check if the file is a valid .npy file
        if file.endswith(".npy"):
            # Load the image features
            image_features = np.load(file_path)
            
            # Compute BoVW representation
            bovw_vector = compute_bovw_representation(image_features, n_clusters)
            
            # Save the BoVW vector in the output directory
            output_file = os.path.join(output_directory, file.replace("_features.npy", "_bovw.npy"))
            np.save(output_file, bovw_vector)

In [None]:
print("Loading all feature vectors...")
all_features = load_all_feature_vectors('featureVector/bayou/train')

# Step 2: Perform K-means clustering
print("Clustering feature vectors into 32 clusters...")
kmeans = cluster_features(all_features)

# Step 3: Compute BoVW representation for all images
print("Processing images to compute BoVW representations...")
process_images_for_bovw('featureVector/music/train','output/music')

print("BoVW representations computed and saved.")

Loading all feature vectors...
Clustering feature vectors into 32 clusters...


In [11]:
print('clustering feature of class1')
kmeansc1Train=cluster_features(c1Train)
process_image_directory('featureVector/bayou/train','output/bayou/train')

clustering feature of class1
Processing image: sun_aarhbeqcuzhoshba_features.npy


ValueError: Image at featureVector/bayou/train/sun_aarhbeqcuzhoshba_features.npy could not be loaded.

In [None]:

print('clustering feature of class2')
kmeansc1Train=cluster_features(c1Train)
process_image_directory('featureVector/bayou/train','output/bayou/train')