In [1]:
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC


In [2]:
def load_images_from_folder(folder):
    images = []
    labels = []
    class_names = sorted(os.listdir(folder))

    for idx, class_name in enumerate(class_names):
        class_path = os.path.join(folder, class_name)
        for file in os.listdir(class_path):
            img = cv2.imread(os.path.join(class_path, file), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                images.append(img)
                labels.append(idx)

    return images, np.array(labels), class_names


In [4]:
import zipfile

# Unzip the dataset first if it's not already unzipped
zip_path = '/content/caltech-transportation.zip'
extract_path = '/content/'

if not os.path.exists(os.path.join(extract_path, 'caltech-transportation_train')):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

train_path = "/content/caltech-transportation_train"
test_path  = "/content/caltech-transportation_test"

X_train, y_train, class_names = load_images_from_folder(train_path)
X_test, y_test, _ = load_images_from_folder(test_path)

In [5]:
sift = cv2.SIFT_create()

def extract_sift_features(images):
    descriptors_list = []

    for img in images:
        keypoints, descriptors = sift.detectAndCompute(img, None)
        if descriptors is not None:
            descriptors_list.append(descriptors)

    return descriptors_list


In [6]:
train_desc = extract_sift_features(X_train)


In [7]:
all_descriptors = np.vstack(train_desc)

K = 100   # αρχική τιμή - θα τη βελτιστοποιήσεις με δοκιμές
kmeans = KMeans(n_clusters=K, random_state=0)
kmeans.fit(all_descriptors)


In [8]:
def build_bovw_histograms(descriptors_list, kmeans, K):
    histograms = []

    for descriptors in descriptors_list:
        hist = np.zeros(K)
        if descriptors is not None:
            labels = kmeans.predict(descriptors)
            for label in labels:
                hist[label] += 1
        histograms.append(hist)

    return np.array(histograms)


In [9]:
X_train_bovw = build_bovw_histograms(train_desc, kmeans, K)
X_test_bovw  = build_bovw_histograms(extract_sift_features(X_test), kmeans, K)


In [10]:
k_values = [1, 3, 5, 7, 9]
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_bovw, y_train)
    preds = knn.predict(X_test_bovw)

    acc = accuracy_score(y_test, preds)
    print(f"k = {k} → Accuracy = {acc:.4f}")


k = 1 → Accuracy = 0.8497
k = 3 → Accuracy = 0.8634
k = 5 → Accuracy = 0.8552
k = 7 → Accuracy = 0.8552
k = 9 → Accuracy = 0.8607


In [11]:
print(classification_report(y_test, preds, target_names=class_names))


              precision    recall  f1-score   support

  Motorbikes       0.86      0.94      0.90       160
   airplanes       0.87      0.93      0.89       160
    car_side       0.83      0.60      0.70        25
       ferry       0.00      0.00      0.00        14
inline_skate       1.00      0.14      0.25         7

    accuracy                           0.86       366
   macro avg       0.71      0.52      0.55       366
weighted avg       0.83      0.86      0.84       366



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
svm = SVC(kernel="linear", decision_function_shape="ovr")
svm.fit(X_train_bovw, y_train)

svm_preds = svm.predict(X_test_bovw)


In [13]:
svm_acc = accuracy_score(y_test, svm_preds)
print("SVM Accuracy:", svm_acc)
print(classification_report(y_test, svm_preds, target_names=class_names))


SVM Accuracy: 0.855191256830601
              precision    recall  f1-score   support

  Motorbikes       0.90      0.94      0.92       160
   airplanes       0.89      0.82      0.86       160
    car_side       0.65      0.68      0.67        25
       ferry       0.45      0.71      0.56        14
inline_skate       1.00      0.57      0.73         7

    accuracy                           0.86       366
   macro avg       0.78      0.75      0.75       366
weighted avg       0.87      0.86      0.86       366



In [14]:
K_values = [50, 100, 200, 300, 400]

for K in K_values:
    kmeans = KMeans(n_clusters=K, random_state=0)
    kmeans.fit(all_descriptors)

    X_train_bovw = build_bovw_histograms(train_desc, kmeans, K)
    X_test_bovw = build_bovw_histograms(extract_sift_features(X_test), kmeans, K)

    svm = SVC(kernel="linear")
    svm.fit(X_train_bovw, y_train)
    preds = svm.predict(X_test_bovw)

    acc = accuracy_score(y_test, preds)
    print(f"K = {K} → Accuracy = {acc:.4f}")


K = 50 → Accuracy = 0.8497
K = 100 → Accuracy = 0.8552
K = 200 → Accuracy = 0.9016
K = 300 → Accuracy = 0.9126
K = 400 → Accuracy = 0.9098


Τα αποτελέσματα δείχνουν ότι η αύξηση του μεγέθους του οπτικού λεξικού βελτιώνει συστηματικά την ακρίβεια, με μέγιστη απόδοση στο K=300 (91.26%). Οι τιμές θεωρούνται εξαιρετικές για σύστημα BoVW με SIFT και γραμμικό SVM, ενώ μετά τα 200 clusters παρατηρείται τάση κορεσμού της απόδοσης.

Η μέγιστη ακρίβεια παρατηρείται για k = 3, γεγονός που υποδεικνύει βέλτιστη ισορροπία μεταξύ υπερπροσαρμογής (k=1) και υποπροσαρμογής (k≥5). Για μεγαλύτερες τιμές του k, η απόδοση σταθεροποιείται ή μειώνεται ελαφρώς, στοιχείο που υποδηλώνει κορεσμό της γενίκευσης