In [1]:
import os
import cv2
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


In [2]:
TRAIN_PATH = "/content/GTSRB/train_modified"
TEST_PATH  = "/content/GTSRB/test_modified"


In [None]:
!unzip /content/GTSRB.zip -d /content/

Archive:  /content/GTSRB.zip
replace /content/GTSRB/test_modified/0/00252.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [15]:
def load_images_from_folder(folder):
    images = []
    labels = []
    class_names = sorted(os.listdir(folder))

    for idx, class_name in enumerate(class_names):
        class_path = os.path.join(folder, class_name)
        for file in os.listdir(class_path):
            img = cv2.imread(os.path.join(class_path, file), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                images.append(img)
                labels.append(idx)

    return images, np.array(labels), class_names


In [16]:
X_train, y_train, class_names = load_images_from_folder(TRAIN_PATH)
X_test, y_test, _ = load_images_from_folder(TEST_PATH)

len(X_train), len(X_test), len(class_names)


(3977, 1983, 43)

In [17]:
sift = cv2.SIFT_create()

def extract_sift_features(images):
    descriptors_list = []
    for img in images:
        kp, des = sift.detectAndCompute(img, None)
        descriptors_list.append(des)
    return descriptors_list


In [18]:
train_desc = extract_sift_features(X_train)
test_desc  = extract_sift_features(X_test)


In [19]:
K_STAR_GTSRB = 400


In [20]:
all_descriptors = np.vstack([d for d in train_desc if d is not None])

kmeans = KMeans(n_clusters=K_STAR_GTSRB, random_state=0)
kmeans.fit(all_descriptors)


In [21]:
def build_bovw_histograms(descriptors_list, kmeans, K):
    histograms = []
    for descriptors in descriptors_list:
        hist = np.zeros(K)
        if descriptors is not None:
            labels = kmeans.predict(descriptors)
            for label in labels:
                hist[label] += 1
        histograms.append(hist)
    return np.array(histograms)


In [22]:
X_train_bovw = build_bovw_histograms(train_desc, kmeans, K_STAR_GTSRB)
X_test_bovw  = build_bovw_histograms(test_desc, kmeans, K_STAR_GTSRB)

X_train_bovw.shape, X_test_bovw.shape


((3977, 400), (1983, 400))

In [23]:
C_values = [0.1, 1, 10]


In [24]:
results = []

for C in C_values:
    print(f"\n Εκπαίδευση SVM για C = {C}")

    svm = SVC(kernel="linear", C=C, decision_function_shape="ovr")
    svm.fit(X_train_bovw, y_train)

    preds = svm.predict(X_test_bovw)
    acc = accuracy_score(y_test, preds)

    print(f"   Accuracy = {acc:.4f}")

    results.append({
        "C": C,
        "Accuracy": acc
    })



 Εκπαίδευση SVM για C = 0.1
   Accuracy = 0.4639

 Εκπαίδευση SVM για C = 1
   Accuracy = 0.4165

 Εκπαίδευση SVM για C = 10
   Accuracy = 0.3696


In [25]:
df_svm_results = pd.DataFrame(results)
df_svm_results


Unnamed: 0,C,Accuracy
0,0.1,0.463944
1,1.0,0.416541
2,10.0,0.369642


In [26]:
df_svm_results.to_csv("experiment_3_svm_C_GTSRB_results.csv", index=False)
print("✅ Αποθήκευση ολοκληρώθηκε: experiment_3_svm_C_GTSRB_results.csv")


✅ Αποθήκευση ολοκληρώθηκε: experiment_3_svm_C_GTSRB_results.csv


In [27]:
best_idx = df_svm_results["Accuracy"].idxmax()
best_C = df_svm_results.loc[best_idx, "C"]
best_C


np.float64(0.1)

In [28]:
best_svm = SVC(kernel="linear", C=best_C, decision_function_shape="ovr")
best_svm.fit(X_train_bovw, y_train)

best_preds = best_svm.predict(X_test_bovw)


Κατά το tuning της παραμέτρου C του γραμμικού SVM στο dataset GTSRB, παρατηρήθηκε ότι η απόδοση του ταξινομητή μειώνεται όσο αυξάνεται το C. Η μέγιστη ακρίβεια ταξινόμησης επιτυγχάνεται για C = 0.1 (46.39%). Η συμπεριφορά αυτή υποδηλώνει ότι στο συγκεκριμένο σύνολο δεδομένων οι κλάσεις δεν είναι γραμμικά διαχωρίσιμες στον χώρο BOVW, με αποτέλεσμα ένα “μαλακότερο” περιθώριο (small C) να γενικεύει καλύτερα. Έτσι, ορίζεται η τιμή C*_GTSRB = 0.1 ως βέλτιστη.