In [26]:
import numpy as np
from importlib import reload
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from utils import create_dataset, show_misclassified, get_percent_misclassified
from features import run3_SIFT_transforms
from classifiers import KNearestNeighbors
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [2]:
transform = run3_SIFT_transforms()
dataset = create_dataset("./training/", transform, labeled=True)
dataloader = DataLoader(dataset, batch_size=len(dataset), shuffle=True)


# load labeled data 
X, y, paths, path_class_idxs = next(iter(dataloader))
X=X.numpy()
y=y.numpy()

In [3]:
X.shape

(1500, 1, 2601, 128)

In [4]:
import features

In [15]:
features = reload(features)

In [16]:
X_bovw = features.get_bovw(X, num_clusters=200)

(1500, 1, 2601, 128)
(3901500, 128)


In [18]:
X_train, X_val, y_train, y_val, paths_train, paths_val = train_test_split(X_bovw,y, paths, test_size=0.2, stratify=y)

In [19]:
classifier_hparams = {'n_neighbors':2, 'n_jobs':-1}
# params for instantiating classifier
# parameter grid to search over when tuning classifier
param_grid = {
    'n_neighbors':[i for i in range(1, 25)],
    'n_jobs':[-1]
}
classifier = KNearestNeighbors(**classifier_hparams)
best_score, best_params = classifier.tune(
    X_train,
    y_train, 
    param_grid=param_grid
    ) # tunes classifier and writes to metrics file
print(f"Best parameters: {best_params}\n")
print(f"Best score: {best_score}\n")

Best parameters: {'n_jobs': -1, 'n_neighbors': 23}

Best score: 0.5096007986295992



In [24]:
# what about an SVM?
lin_clf = SVC()
lin_clf.fit(X_train, y_train)


SVC()

In [25]:
lin_clf.score(X_val, y_val)

0.5866666666666667

In [33]:
# what about a random forest classifier?
clf = RandomForestClassifier(n_jobs=-1, min_samples_leaf=10)
clf.fit(X_train, y_train)

RandomForestClassifier(min_samples_leaf=10, n_jobs=-1)

In [34]:
clf.score(X_val, y_val)

0.5933333333333334