In [1]:
import os
import cv2
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


In [2]:
TRAIN_PATH = "/content/GTSRB/train_modified"
TEST_PATH  = "/content/GTSRB/test_modified"


In [5]:
!unzip /content/GTSRB.zip -d /content/

[1;30;43mΗ έξοδος ροής περικόπηκε στις τελευταίες 5000 γραμμές.[0m
  inflating: /content/GTSRB/test_modified/26/04314.png  
  inflating: /content/GTSRB/test_modified/26/04698.png  
 extracting: /content/GTSRB/test_modified/26/05000.png  
 extracting: /content/GTSRB/test_modified/26/05176.png  
 extracting: /content/GTSRB/test_modified/26/05251.png  
 extracting: /content/GTSRB/test_modified/26/06200.png  
 extracting: /content/GTSRB/test_modified/26/06258.png  
 extracting: /content/GTSRB/test_modified/26/06338.png  
 extracting: /content/GTSRB/test_modified/26/07080.png  
 extracting: /content/GTSRB/test_modified/26/08677.png  
  inflating: /content/GTSRB/test_modified/26/08702.png  
 extracting: /content/GTSRB/test_modified/26/09019.png  
 extracting: /content/GTSRB/test_modified/26/09177.png  
 extracting: /content/GTSRB/test_modified/26/09570.png  
 extracting: /content/GTSRB/test_modified/26/09927.png  
 extracting: /content/GTSRB/test_modified/26/10096.png  
 extracting: /conte

In [6]:
def load_images_from_folder(folder):
    images = []
    labels = []
    class_names = sorted(os.listdir(folder))

    for idx, class_name in enumerate(class_names):
        class_path = os.path.join(folder, class_name)
        for file in os.listdir(class_path):
            img = cv2.imread(os.path.join(class_path, file), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                images.append(img)
                labels.append(idx)

    return images, np.array(labels), class_names


In [7]:
X_train, y_train, class_names = load_images_from_folder(TRAIN_PATH)
X_test, y_test, _ = load_images_from_folder(TEST_PATH)

len(X_train), len(X_test), len(class_names)


(3977, 1983, 43)

In [8]:
sift = cv2.SIFT_create()

def extract_sift_features(images):
    descriptors_list = []
    for img in images:
        kp, des = sift.detectAndCompute(img, None)
        descriptors_list.append(des)
    return descriptors_list


In [9]:
train_desc = extract_sift_features(X_train)
test_desc  = extract_sift_features(X_test)


In [10]:
def build_bovw_histograms(descriptors_list, kmeans, K):
    histograms = []
    for descriptors in descriptors_list:
        hist = np.zeros(K)
        if descriptors is not None:
            labels = kmeans.predict(descriptors)
            for label in labels:
                hist[label] += 1
        histograms.append(hist)
    return np.array(histograms)


In [11]:
K_values = [50, 100, 200, 300, 400]
N_RUNS = 5


In [12]:
results = []

all_descriptors = np.vstack([d for d in train_desc if d is not None])

for K in K_values:
    print(f"\n🔁 Εκτέλεση για K = {K}")
    acc_scores = []

    for run in range(N_RUNS):
        print(f"  ▶ Run {run+1}/{N_RUNS}")

        kmeans = KMeans(n_clusters=K, random_state=run)
        kmeans.fit(all_descriptors)

        X_train_bovw = build_bovw_histograms(train_desc, kmeans, K)
        X_test_bovw  = build_bovw_histograms(test_desc, kmeans, K)

        svm = SVC(kernel="linear", C=1, decision_function_shape="ovr")
        svm.fit(X_train_bovw, y_train)

        preds = svm.predict(X_test_bovw)
        acc = accuracy_score(y_test, preds)

        acc_scores.append(acc)

    mean_acc = np.mean(acc_scores)

    results.append({
        "K": K,
        "Run1": acc_scores[0],
        "Run2": acc_scores[1],
        "Run3": acc_scores[2],
        "Run4": acc_scores[3],
        "Run5": acc_scores[4],
        "Mean Accuracy": mean_acc
    })



🔁 Εκτέλεση για K = 50
  ▶ Run 1/5
  ▶ Run 2/5
  ▶ Run 3/5
  ▶ Run 4/5
  ▶ Run 5/5

🔁 Εκτέλεση για K = 100
  ▶ Run 1/5
  ▶ Run 2/5
  ▶ Run 3/5
  ▶ Run 4/5
  ▶ Run 5/5

🔁 Εκτέλεση για K = 200
  ▶ Run 1/5
  ▶ Run 2/5
  ▶ Run 3/5
  ▶ Run 4/5
  ▶ Run 5/5

🔁 Εκτέλεση για K = 300
  ▶ Run 1/5
  ▶ Run 2/5
  ▶ Run 3/5
  ▶ Run 4/5
  ▶ Run 5/5

🔁 Εκτέλεση για K = 400
  ▶ Run 1/5
  ▶ Run 2/5
  ▶ Run 3/5
  ▶ Run 4/5
  ▶ Run 5/5


In [13]:
df_results = pd.DataFrame(results)
df_results


Unnamed: 0,K,Run1,Run2,Run3,Run4,Run5,Mean Accuracy
0,50,0.274332,0.286939,0.293999,0.281896,0.27887,0.283207
1,100,0.332829,0.345436,0.321735,0.336359,0.332829,0.333838
2,200,0.374181,0.376702,0.375189,0.385275,0.378719,0.378013
3,300,0.398891,0.412506,0.393343,0.399899,0.397378,0.400403
4,400,0.416541,0.416541,0.398386,0.413011,0.411498,0.411195


In [14]:
df_results.to_csv("experiment_1_bovw_K_GTSRB_results.csv", index=False)
print("✅ Αποθήκευση ολοκληρώθηκε!")


✅ Αποθήκευση ολοκληρώθηκε!
