In [40]:
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from pathlib import Path
import preprocessing


current_folder = Path()
dataset_folder = current_folder / "dataset"
images_folder = dataset_folder / "images"
models_folder = current_folder / "models"
logs_folder = current_folder / "logs"

X_train, X_test, y_train, y_test = preprocessing.get_dataset()
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

INFO: modified dataset already created


In [41]:
"""
from imblearn.over_sampling import SMOTE

X_flat = np.reshape(X_train, (X_train.shape[0], int(np.product(X_train.shape) / X_train.shape[0])))

sm = SMOTE(n_jobs=-1, random_state=42)
X_train_os, y_train_os = sm.fit_resample(X_flat, y_train)

X_train_os_rs = np.reshape(X_train_os, tuple([X_train_os.shape[0]]) + X_train.shape[1:])

X_train = X_train_os_rs
y_train = y_train_os
"""

'\nfrom imblearn.over_sampling import SMOTE\n\nX_flat = np.reshape(X_train, (X_train.shape[0], int(np.product(X_train.shape) / X_train.shape[0])))\n\nsm = SMOTE(n_jobs=-1, random_state=42)\nX_train_os, y_train_os = sm.fit_resample(X_flat, y_train)\n\nX_train_os_rs = np.reshape(X_train_os, tuple([X_train_os.shape[0]]) + X_train.shape[1:])\n\nX_train = X_train_os_rs\ny_train = y_train_os\n'

In [42]:
np.unique(y_train, return_counts=True)

(array(['crosswalk', 'speedlimit', 'stop', 'trafficlight'], dtype='<U12'),
 array([160, 626,  74, 135], dtype=int64))

In [43]:
"""
# One hot encoding
label_enc = sklearn.preprocessing.LabelEncoder()
y_train_raw = y_train
y_test_raw = y_test
y_train = label_enc.fit_transform(y_train)
y_test = label_enc.transform(y_test)
one_hot = sklearn.preprocessing.OneHotEncoder(sparse=False)
y_train = one_hot.fit_transform(y_train.reshape(-1, 1))
y_test = one_hot.transform(y_test.reshape(-1, 1))

X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.25, random_state=54
)
"""

'\n# One hot encoding\nlabel_enc = sklearn.preprocessing.LabelEncoder()\ny_train_raw = y_train\ny_test_raw = y_test\ny_train = label_enc.fit_transform(y_train)\ny_test = label_enc.transform(y_test)\none_hot = sklearn.preprocessing.OneHotEncoder(sparse=False)\ny_train = one_hot.fit_transform(y_train.reshape(-1, 1))\ny_test = one_hot.transform(y_test.reshape(-1, 1))\n\nX_train, X_valid, y_train, y_valid = train_test_split(\n    X_train, y_train, test_size=0.25, random_state=54\n)\n'

In [47]:
import cv2
import skimage
import numpy as np

def extract_features(X_train, y_train):
    sift_vectors = {}
    for el in set(y_train):
        sift_vectors[el] = []

    descriptor_list = []
    sift = cv2.SIFT_create()
    #brisk = cv2.BRISK_create(30)

    for label, image in zip(y_train, X_train):
        img = skimage.img_as_ubyte(image)
        kp, des = sift.detectAndCompute(img, None)
        #kp, des = sift.detectAndCompute(image, None)
        if des is None:
            continue

        descriptor_list.extend(des)
        sift_vectors[label].append(des)

    return [descriptor_list, sift_vectors]

descriptor_list, all_bovw_feature = extract_features(X_train, y_train)
_, test_bovw_feature = extract_features(X_test, y_test)

In [48]:
from sklearn.cluster import KMeans

def kmeans(k, descriptor_list):
    kmeans = KMeans(n_clusters=k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_
    return visual_words

visual_words = kmeans(150, descriptor_list)

In [49]:
from scipy.spatial import distance

def find_index(instance, main_list):
    min_dist = 10e50
    min_idx = -1
    for idx, el in enumerate(main_list):
        dist = distance.euclidean(instance, el)
        if dist < min_dist:
            min_dist = dist
            min_idx = idx

    return min_idx

def image_class(all_bovw, centers):
    dict_feature = {}
    for key, value in all_bovw.items():
        category = []
        for img_feat_descriptors in value:
            histogram = np.zeros(len(centers))
            for each_feature in img_feat_descriptors:
                ind = find_index(each_feature, centers)
                histogram[ind] += 1
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature

# Creates histograms for train data
bovw_train = image_class(all_bovw_feature, visual_words)
# Creates histograms for test data
bovw_test = image_class(test_bovw_feature, visual_words)

In [50]:
def knn(images, tests):
    num_test = 0
    correct_predict = 0
    class_based = {}

    for test_key, test_val in tests.items():
        class_based[test_key] = [0, 0]  # [correct, all]
        for tst in test_val:
            predict_start = 0
            # print(test_key)
            minimum = 0
            key = "a"  # predicted
            for train_key, train_val in images.items():
                for train in train_val:
                    if predict_start == 0:
                        minimum = distance.euclidean(tst, train)
                        # minimum = L1_dist(tst,train)
                        key = train_key
                        predict_start += 1
                    else:
                        dist = distance.euclidean(tst, train)
                        # dist = L1_dist(tst,train)
                        if dist < minimum:
                            minimum = dist
                            key = train_key

            if test_key == key:
                correct_predict += 1
                class_based[test_key][0] += 1
            num_test += 1
            class_based[test_key][1] += 1
            # print(minimum)
    return [num_test, correct_predict, class_based]

# Evaluate per class
results_bowl = knn(bovw_train, bovw_test)

In [51]:
def accuracy(results):
    avg_accuracy = (results[1] / results[0]) * 100
    print("Average accuracy: %" + str(avg_accuracy))
    print("-------------------------")
    print("Class based accuracies:")
    for key, value in results[2].items():
        acc = (value[0] / value[1]) * 100
        print(f"{key}: % {acc:.3f}")

accuracy(results_bowl)

Average accuracy: %90.36144578313254
-------------------------
Class based accuracies:
trafficlight: % 65.714
speedlimit: % 94.904
stop: % 94.118
crosswalk: % 92.500


In [None]:
Y_pred = np.argmax(y_pred, axis=1)  # one-hot to index
Y_test = np.argmax(y_test, axis=1)
print(
    sklearn.metrics.classification_report(
        , Y_pred, target_names=label_enc.classes_
    )
)

# Evaluate general
test_results = model.evaluate(X_test, y_test)  # loss and metrics
print(f"Test Data - Loss: {test_results[0]:.3f}, Metrics: {test_results[1:]}")

NameError: name 'y_pred' is not defined