In [1]:
import numpy as np
import cv2
import os
from tqdm import tqdm
import KMeansClustering as kmc

In [2]:
def extract_color_histograms(image_path, patch_size=32, bins=8):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")
    
    height, width, _ = image.shape
    patches = []

    for i in range(0, height, patch_size):
        for j in range(0, width, patch_size):
            patch = image[i:min(i + patch_size, height), j:min(j + patch_size, width)]
            histograms = []

            for channel in range(3):
                hist = cv2.calcHist([patch], [channel], None, [bins], [0, 256])
                hist = hist / np.sum(hist)
                histograms.append(hist.flatten())
            
            feature_vector = np.concatenate(histograms)
            patches.append(feature_vector)
    
    return np.array(patches)


In [3]:
def process_raw_images(image_directory, feature_output_directory):
    if not os.path.exists(feature_output_directory):
        os.makedirs(feature_output_directory)
    
    for image_file in os.listdir(image_directory):
        if image_file.endswith(('.jpg', '.png', '.jpeg')):  
            image_path = os.path.join(image_directory, image_file)
            print(f"Processing image: {image_file}")
            
            feature_vectors = extract_color_histograms(image_path)
            class_name = os.path.splitext(image_file)[0]
            output_file = os.path.join(feature_output_directory, f"{class_name}_features.npy")
            np.save(output_file, feature_vectors)


In [4]:

image_directory = 'group01_2/group01/test/music_store'
output_directory = 'featureVector/music_store/test'          
process_raw_images(image_directory, output_directory)

Processing image: sun_dbizycsfucqlktnk.jpg
Processing image: sun_dbsadqzxemdqaqih.jpg
Processing image: sun_dcyykqocjeammfed.jpg
Processing image: sun_dewwrshgfvqarkpm.jpg
Processing image: sun_dezyrtacbxgoehoi.jpg
Processing image: sun_dfvnumnuiqwvnexc.jpg
Processing image: sun_dgavxowprqmnpbtq.jpg
Processing image: sun_dgpmwtllspwrcjtt.jpg
Processing image: sun_dhgkvwtvjrrztnja.jpg
Processing image: sun_digogqszkbfeatdg.jpg
Processing image: sun_djcmyzarzpihmmtp.jpg
Processing image: sun_djfnuzjabouwewtz.jpg
Processing image: sun_djjanyrxxvqqixzv.jpg
Processing image: sun_djskmwiztkajspnj.jpg
Processing image: sun_djyusglcuskwollt.jpg
Processing image: sun_djzqfjivakzhjgpr.jpg
Processing image: sun_dkckcmhkciosnqin.jpg
Processing image: sun_dkowxswtzyxhfppk.jpg
Processing image: sun_dlhohleroujqhdoh.jpg
Processing image: sun_dltolffubmjreost.jpg
Processing image: sun_dlvwxrkkzbcoxyji.jpg
Processing image: sun_dlweeybvbggfglms.jpg
Processing image: sun_dmgwlvewospjbakm.jpg
Processing 

In [5]:

image_directory = 'group01_2/group01/train/music_store'
output_directory = 'featureVector/music_store/train'          
process_raw_images(image_directory, output_directory)

Processing image: sun_daifsnxvnraggsqi.jpg
Processing image: sun_daikaxybbddyqxsu.jpg
Processing image: sun_daxbgqevqutlmehh.jpg
Processing image: sun_dbbvcqinqcyrzodb.jpg
Processing image: sun_dbdmpfowvniuhgxu.jpg
Processing image: sun_dbfdwldcozhvserm.jpg
Processing image: sun_dbulmxwemnnvjvkc.jpg
Processing image: sun_deajqpusaccgovij.jpg
Processing image: sun_dedisnaoqkqbmvnj.jpg
Processing image: sun_dfcbqbnlyvlpscvk.jpg
Processing image: sun_dfcncwaxygnslvon.jpg
Processing image: sun_dfdyaexozbddzohp.jpg
Processing image: sun_dftgcnthmtvcrybj.jpg
Processing image: sun_dgiuclqmuswybxeb.jpg
Processing image: sun_dgmrjvutxpyprbjx.jpg
Processing image: sun_dhjnodduohfguatq.jpg
Processing image: sun_diinfmuwcpwhadpt.jpg
Processing image: sun_dipistwqnoruzjaj.jpg
Processing image: sun_ditblzttmgpovvab.jpg
Processing image: sun_djakbtvniskobncl.jpg
Processing image: sun_djxlspezzrvydwni.jpg
Processing image: sun_djyzyaatsejhwkqm.jpg
Processing image: sun_dkeicrozlzuuxahw.jpg
Processing 

In [6]:

image_directory = 'group01_2/group01/test/bayou'
output_directory = 'featureVector/bayou/test'          
process_raw_images(image_directory, output_directory)

Processing image: sun_aalcrrdbjgdztbiz.jpg
Processing image: sun_aapymkzhpzyqngqq.jpg
Processing image: sun_abdfuzkqnpifraag.jpg
Processing image: sun_abdztwtjprqilmtk.jpg
Processing image: sun_abnnqvfzczgjryby.jpg
Processing image: sun_abrvkgcicupmeqcj.jpg
Processing image: sun_acqdeebwilpbhnzi.jpg
Processing image: sun_acviagnxrxwdlyir.jpg
Processing image: sun_adowkednntiyspra.jpg
Processing image: sun_adpkghabhibjfmsz.jpg
Processing image: sun_adyongvucbxhodnr.jpg
Processing image: sun_adzxbzyleecfewsv.jpg
Processing image: sun_aeaqnsdfewyxqmar.jpg
Processing image: sun_aedojhzvfiaiiupe.jpg
Processing image: sun_aejvxvyvqbeuqghj.jpg
Processing image: sun_aetnicewlsqqvjir.jpg
Processing image: sun_affccxeosrerqbvj.jpg
Processing image: sun_agdowexlavqcprzz.jpg
Processing image: sun_ahftxhltgxgemher.jpg
Processing image: sun_ajhtdxwwxlvufloj.jpg
Processing image: sun_ajmfizfxxtkvqzsa.jpg
Processing image: sun_ajttlovkwmqdqwsb.jpg
Processing image: sun_akmjwkkursbvwwwx.jpg
Processing 

In [7]:

image_directory = 'group01_2/group01/train/bayou'
output_directory = 'featureVector/bayou/train'          
process_raw_images(image_directory, output_directory)

Processing image: sun_aarhbeqcuzhoshba.jpg
Processing image: sun_abulrpiruxyrrzgh.jpg
Processing image: sun_acfzigzjtoviibhu.jpg
Processing image: sun_acvjnccckrkbgwsx.jpg
Processing image: sun_adgwxwivrkdjyocl.jpg
Processing image: sun_adhnoxscoslxsjgv.jpg
Processing image: sun_aecomphekrehafyx.jpg
Processing image: sun_aemhvbqdohnremkv.jpg
Processing image: sun_aennafxukhyubzwj.jpg
Processing image: sun_ahftiayzxjubbyhz.jpg
Processing image: sun_ahvtrwpkjkkslvrv.jpg
Processing image: sun_ahzcfsvgdxtyffeg.jpg
Processing image: sun_ailwsjmjbvfnvaxt.jpg
Processing image: sun_aipazzmtgqcwrjex.jpg
Processing image: sun_ajkeeikqzcyucmks.jpg
Processing image: sun_ajyecrjmsjclzxut.jpg
Processing image: sun_akefnwicklzhvrfc.jpg
Processing image: sun_akksslqfkjqufsud.jpg
Processing image: sun_algykmlzsooyrtkq.jpg
Processing image: sun_allsvvdvpkxtzqvb.jpg
Processing image: sun_amnxbqyktiqkmiqj.jpg
Processing image: sun_amwnadzlooambudu.jpg
Processing image: sun_anapwhdtyqjxzayl.jpg
Processing 

In [8]:

image_directory = 'group01_2/group01/test/desert_vegetation'
output_directory = 'featureVector/desert_vegetation/test'          
process_raw_images(image_directory, output_directory)

Processing image: sun_acmxnsedgirrlcgi.jpg
Processing image: sun_adohktpblbjsuugn.jpg
Processing image: sun_ahksirclsgdqnbyc.jpg
Processing image: sun_aieudxgcttaxlaec.jpg
Processing image: sun_alvwpybffxcnjfbd.jpg
Processing image: sun_amptqhlydjbtwdgo.jpg
Processing image: sun_amutmkauvllgeuwj.jpg
Processing image: sun_anqgujctowxctbut.jpg
Processing image: sun_anyhyhsexxwuxnnw.jpg
Processing image: sun_anyuunzbgpuyrcxm.jpg
Processing image: sun_appjznkejfvhpcbi.jpg
Processing image: sun_asbojivucftodjsj.jpg
Processing image: sun_asnkeeebroqrogsw.jpg
Processing image: sun_atxmvwjigvjhosai.jpg
Processing image: sun_auakvcqmyedzmpzc.jpg
Processing image: sun_aubnnpsvwxjpooqm.jpg
Processing image: sun_auuvtwswmaiqdefn.jpg
Processing image: sun_avdpaarqoanuqhsa.jpg
Processing image: sun_awjabhoyounouqap.jpg
Processing image: sun_awmgeonbcojofyzt.jpg
Processing image: sun_aypboapkrbtrmxqn.jpg
Processing image: sun_aytguqqbadccqkyt.jpg
Processing image: sun_azzoctvotjbyazvz.jpg
Processing 

In [9]:

image_directory = 'group01_2/group01/train/desert_vegetation'
output_directory = 'featureVector/desert_vegetation/train'          
process_raw_images(image_directory, output_directory)

Processing image: sun_aapfaivxqoynnwwp.jpg
Processing image: sun_aatbkteovwwsrhmt.jpg
Processing image: sun_adgjlkpdyosbnnlj.jpg
Processing image: sun_aenkarsijclzvano.jpg
Processing image: sun_aichqsxdfhyjzxyh.jpg
Processing image: sun_ajuhszrbgvjrdpec.jpg
Processing image: sun_akhnxgzxoxxfudbs.jpg
Processing image: sun_akywimwycfdiskni.jpg
Processing image: sun_aocaxozrgmudnwlf.jpg
Processing image: sun_aooxopykmiqnsqbq.jpg
Processing image: sun_apahgdkbybnvnrgj.jpg
Processing image: sun_apauahmaalvdubuc.jpg
Processing image: sun_asafsyjdgfjewaoq.jpg
Processing image: sun_asgzcoxwwvwbqflj.jpg
Processing image: sun_asvvesxgisymjhlh.jpg
Processing image: sun_avxfzujxgpqfvbne.jpg
Processing image: sun_azfkuhvpucbpmmkj.jpg
Processing image: sun_bbbrempkthvvvzro.jpg
Processing image: sun_bcixjiqjpnnfiire.jpg
Processing image: sun_bckgceloirlpicuo.jpg
Processing image: sun_bctumlvscqlhxnlx.jpg
Processing image: sun_bdtwdeemzgsqwpgv.jpg
Processing image: sun_beuywdtibjxifjvw.jpg
Processing 

In [10]:

def load_all_feature_vectors(directory):
    all_features = []
    
    for file in os.listdir(directory):
        file_path = os.path.join(directory, file)
        
        if os.path.isfile(file_path) and file.endswith(".npy"):
            features = np.load(file_path)
            all_features.append(features)
    
    if all_features:
        return np.vstack(all_features)

In [11]:
def compute_bovw_representation(image_features, centroids):
    n_clusters = len(centroids)
    distances = np.zeros((len(image_features), n_clusters))
    for i, centroid in enumerate(centroids):
        distances[:, i] = np.linalg.norm(image_features - centroid, axis=1)
    idx = np.argmin(distances, axis=1)
    
    cluster_counts = np.bincount(idx, minlength=n_clusters)
    bovw_vector = cluster_counts / len(image_features)
    return bovw_vector

In [12]:
def process_features_for_bovw(feature_directory, bovw_output_directory, centroids):
    if not os.path.exists(bovw_output_directory):
        os.makedirs(bovw_output_directory)
    
    for file in tqdm(os.listdir(feature_directory)):
        if file.endswith('_features.npy'):
            feature_path = os.path.join(feature_directory, file)
            image_features = np.load(feature_path)
            
            bovw_vector = compute_bovw_representation(image_features, centroids)
            
            output_file = os.path.join(
                bovw_output_directory, 
                file.replace('_features.npy', '_bovw.npy')
            )
            np.save(output_file, bovw_vector)

In [13]:
all_features = np.vstack([
    load_all_feature_vectors('featureVector/bayou/train'),
    load_all_feature_vectors('featureVector/desert_vegetation/train'),
    load_all_feature_vectors('featureVector/music_store/train')
])
centroids, _ = kmc.KMeansClustering(all_features,32, maxIteration=100)

for category in ['bayou', 'desert_vegetation', 'music_store']:
    for split in ['train', 'test']:
        feature_dir = f'featureVector/{category}/{split}'
        bovw_dir = f'output/{category}/{split}'
        process_features_for_bovw(feature_dir, bovw_dir, centroids)

100%|██████████| 50/50 [00:00<00:00, 465.18it/s]
100%|██████████| 50/50 [00:00<00:00, 508.71it/s]
100%|██████████| 50/50 [00:00<00:00, 997.63it/s]
100%|██████████| 50/50 [00:00<00:00, 926.65it/s]
100%|██████████| 50/50 [00:00<00:00, 1898.55it/s]
100%|██████████| 50/50 [00:00<00:00, 812.81it/s]
