In [1]:
import numpy as np
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report, accuracy_score

from SIFT import SIFT
from FeatureExtraction import FeatureExtraction
from utils import read_processed_data

In [2]:
data, labels = read_processed_data()

# Split the data using sklearn's train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)
data = 1
labels = 1
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

# Check the size of the training and test sets
print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

Training set size: 2397
Validation set size: 800
Test set size: 800


In [3]:
sift_extractor_training = SIFT(X_train)
sift_extractor_training.extract_descriptors()

In [4]:
# Cluster descriptors using k-means
k = 28*4  # Number of clusters
kmeans = KMeans(n_clusters=k, random_state=42, n_init=1)
kmeans.fit(np.vstack(sift_extractor_training.descriptors))

In [5]:
feature_extractor_training = FeatureExtraction(X_train, kmeans, sift_extractor_training.descriptors)
X_train = feature_extractor_training.bag_of_visual_words_SIFT()
X_train.shape

(2397, 112)

In [6]:
# Initialize an SVM model with a linear kernel
svm_model = svm.SVC(kernel='linear', random_state=42)

# Train (fit) the SVM model on the training data
svm_model.fit(X_train, y_train)

In [7]:
# Predict the training data to evaluate model performance
y_train_pred = svm_model.predict(X_train)

# Print a classification report and accuracy score to see how well the model is doing
print("Training Accuracy:", accuracy_score(y_train, y_train_pred))
print("Classification Report on Training Data:\n", classification_report(y_train, y_train_pred))

Training Accuracy: 0.8986232790988736
Classification Report on Training Data:
               precision    recall  f1-score   support

           0       0.94      0.98      0.96       602
           1       0.99      0.96      0.98       589
           2       0.75      0.98      0.85       597
           3       0.99      0.68      0.81       609

    accuracy                           0.90      2397
   macro avg       0.92      0.90      0.90      2397
weighted avg       0.92      0.90      0.90      2397



In [8]:
feature_extractor_validation = FeatureExtraction(X_val, kmeans)
X_val = feature_extractor_validation.bag_of_visual_words_SIFT()
X_val.shape

(800, 112)

In [9]:
# Predict the training data to evaluate model performance
y_val_pred = svm_model.predict(X_val)

# Print a classification report and accuracy score to see how well the model is doing
print("Validation Accuracy:", accuracy_score(y_val, y_val_pred))
print("Classification Report on Validation Data:\n", classification_report(y_val, y_val_pred))

Validation Accuracy: 0.87625
Classification Report on Validation Data:
               precision    recall  f1-score   support

           0       0.92      0.96      0.94       190
           1       0.99      0.97      0.98       201
           2       0.73      0.96      0.83       212
           3       0.96      0.61      0.75       197

    accuracy                           0.88       800
   macro avg       0.90      0.87      0.87       800
weighted avg       0.90      0.88      0.87       800

