In [4]:
import os
import cv2
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle


In [9]:
TRAIN_PATH = "/content/caltech-transportation_train"
TEST_PATH  = "/content/caltech-transportation_test"


In [10]:
def load_images_from_folder(folder):
    images = []
    labels = []
    class_names = sorted(os.listdir(folder))

    for idx, class_name in enumerate(class_names):
        class_path = os.path.join(folder, class_name)
        for file in os.listdir(class_path):
            img = cv2.imread(os.path.join(class_path, file), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                images.append(img)
                labels.append(idx)

    return images, np.array(labels), class_names


In [8]:
import zipfile

# Unzip the dataset if it hasn't been unzipped yet
zip_file_path = '/content/caltech-transportation.zip'
if os.path.exists(zip_file_path) and not os.path.exists(TRAIN_PATH):
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall('/content/')
    print("Dataset unzipped successfully.")

X_train, y_train, class_names = load_images_from_folder(TRAIN_PATH)
X_test, y_test, _ = load_images_from_folder(TEST_PATH)

Dataset unzipped successfully.


In [11]:
sift = cv2.SIFT_create()

def extract_sift_features(images):
    descriptors_list = []
    for img in images:
        kp, des = sift.detectAndCompute(img, None)
        descriptors_list.append(des)
    return descriptors_list


In [12]:
train_desc = extract_sift_features(X_train)
test_desc  = extract_sift_features(X_test)


In [13]:
def build_bovw_histograms(descriptors_list, kmeans, K):
    histograms = []

    for descriptors in descriptors_list:
        hist = np.zeros(K)
        if descriptors is not None:
            labels = kmeans.predict(descriptors)
            for label in labels:
                hist[label] += 1
        histograms.append(hist)

    return np.array(histograms)


In [14]:
K_values = [50, 100, 200, 300, 400]
N_RUNS = 5   # Œ±œÅŒπŒ∏ŒºœåœÇ ŒµœÄŒ±ŒΩŒ±ŒªŒÆœàŒµœâŒΩ Œ≥ŒπŒ± œÉœÑŒ±œÑŒπœÉœÑŒπŒ∫ŒÆ Œ±ŒæŒπŒøœÄŒπœÉœÑŒØŒ±


In [15]:
results = []

all_descriptors = np.vstack([d for d in train_desc if d is not None])

for K in K_values:
    print(f"\nüîÅ ŒïŒ∫œÑŒ≠ŒªŒµœÉŒ∑ Œ≥ŒπŒ± K = {K}")
    acc_scores = []

    for run in range(N_RUNS):
        print(f"  ‚ñ∂ Run {run+1}/{N_RUNS}")

        kmeans = KMeans(n_clusters=K, random_state=run)
        kmeans.fit(all_descriptors)

        X_train_bovw = build_bovw_histograms(train_desc, kmeans, K)
        X_test_bovw  = build_bovw_histograms(test_desc, kmeans, K)

        # SVM One-vs-All (œÉœÑŒ±Œ∏ŒµœÅœå)
        svm = SVC(kernel="linear", C=1, decision_function_shape="ovr")
        svm.fit(X_train_bovw, y_train)

        preds = svm.predict(X_test_bovw)
        acc = accuracy_score(y_test, preds)

        acc_scores.append(acc)

    mean_acc = np.mean(acc_scores)

    results.append({
        "K": K,
        "Run1": acc_scores[0],
        "Run2": acc_scores[1],
        "Run3": acc_scores[2],
        "Run4": acc_scores[3],
        "Run5": acc_scores[4],
        "Mean Accuracy": mean_acc
    })



üîÅ ŒïŒ∫œÑŒ≠ŒªŒµœÉŒ∑ Œ≥ŒπŒ± K = 50
  ‚ñ∂ Run 1/5
  ‚ñ∂ Run 2/5
  ‚ñ∂ Run 3/5
  ‚ñ∂ Run 4/5
  ‚ñ∂ Run 5/5

üîÅ ŒïŒ∫œÑŒ≠ŒªŒµœÉŒ∑ Œ≥ŒπŒ± K = 100
  ‚ñ∂ Run 1/5
  ‚ñ∂ Run 2/5
  ‚ñ∂ Run 3/5
  ‚ñ∂ Run 4/5
  ‚ñ∂ Run 5/5

üîÅ ŒïŒ∫œÑŒ≠ŒªŒµœÉŒ∑ Œ≥ŒπŒ± K = 200
  ‚ñ∂ Run 1/5
  ‚ñ∂ Run 2/5
  ‚ñ∂ Run 3/5
  ‚ñ∂ Run 4/5
  ‚ñ∂ Run 5/5

üîÅ ŒïŒ∫œÑŒ≠ŒªŒµœÉŒ∑ Œ≥ŒπŒ± K = 300
  ‚ñ∂ Run 1/5
  ‚ñ∂ Run 2/5
  ‚ñ∂ Run 3/5
  ‚ñ∂ Run 4/5
  ‚ñ∂ Run 5/5

üîÅ ŒïŒ∫œÑŒ≠ŒªŒµœÉŒ∑ Œ≥ŒπŒ± K = 400
  ‚ñ∂ Run 1/5
  ‚ñ∂ Run 2/5
  ‚ñ∂ Run 3/5
  ‚ñ∂ Run 4/5
  ‚ñ∂ Run 5/5


In [16]:
df_results = pd.DataFrame(results)
df_results


Unnamed: 0,K,Run1,Run2,Run3,Run4,Run5,Mean Accuracy
0,50,0.849727,0.827869,0.827869,0.844262,0.838798,0.837705
1,100,0.855191,0.86612,0.833333,0.855191,0.814208,0.844809
2,200,0.901639,0.901639,0.904372,0.912568,0.885246,0.901093
3,300,0.912568,0.885246,0.893443,0.907104,0.909836,0.901639
4,400,0.909836,0.915301,0.923497,0.896175,0.915301,0.912022


In [17]:
df_results.to_csv("experiment_1_bovw_K_results.csv", index=False)
print("‚úÖ ŒëœÄŒøŒ∏ŒÆŒ∫ŒµœÖœÉŒ∑ ŒøŒªŒøŒ∫ŒªŒ∑œÅœéŒ∏Œ∑Œ∫Œµ: experiment_1_bovw_K_results.csv")


‚úÖ ŒëœÄŒøŒ∏ŒÆŒ∫ŒµœÖœÉŒ∑ ŒøŒªŒøŒ∫ŒªŒ∑œÅœéŒ∏Œ∑Œ∫Œµ: experiment_1_bovw_K_results.csv
