# **Methods for computing the features used for classification**

In [None]:
import numpy as np
import cv2
import math
import pandas as pd
import os

def hog_compute_gradients(image):

    magnitude_x = np.zeros_like(image, dtype=np.float32)
    magnitude_y = np.zeros_like(image, dtype=np.float32)
    
    for i in range(1, image.shape[0] - 1):
        for j in range(1, image.shape[1] - 1):
            magnitude_x[i, j] = image[i, j + 1].astype(np.float32) - image[i, j - 1].astype(np.float32)
            magnitude_y[i, j] = image[i - 1, j].astype(np.float32) - image[i + 1, j].astype(np.float32)
    
    magnitude = np.sqrt(magnitude_x ** 2 + magnitude_y ** 2)
    orientation = np.arctan2(magnitude_y, magnitude_x)
    orientation[orientation > 0] *= 180 / np.pi
    orientation[orientation < 0] = (orientation[orientation < 0] + np.pi) * 180 / np.pi
    
    return magnitude, orientation

def hog_compute_histograms(magnitude, orientation, cell_size=(8, 8), bins=9):

    hist = np.zeros(shape=(magnitude.shape[0] // cell_size[0], magnitude.shape[1] // cell_size[1], bins))
    step_size = 180 // bins
    
    for i in range(hist.shape[0]):
        for j in range(hist.shape[1]):
            cell_magnitude = magnitude[i * cell_size[0]:(i + 1) * cell_size[0], j * cell_size[1]:(j + 1) * cell_size[1]]
            cell_orientation = orientation[i * cell_size[0]:(i + 1) * cell_size[0], j * cell_size[1]:(j + 1) * cell_size[1]]
            
            cell_hist = np.zeros(bins)
            for ii in range(cell_magnitude.shape[0]):
                for jj in range(cell_magnitude.shape[1]):
                    
                    bin_index = math.floor(cell_orientation[ii, jj] / step_size - 0.5)
                    
                    value1 = cell_magnitude[ii, jj] * (cell_orientation[ii, jj] / step_size - 0.5)
                    center = step_size * (bin_index + 0.5)
                    value2 = cell_magnitude[ii, jj] * ((cell_orientation[ii, jj] - center) / step_size)
                    
                    cell_hist[bin_index] += value1
                    cell_hist[(bin_index + 1) % bins] += value2
            
            hist[i, j] = cell_hist
    
    return hist

def hog_normalize_histograms(hist, block_size=(2, 2), epsilon=1e-5):

    normalized_hist = np.zeros(shape=(hist.shape[0] - block_size[0] + 1, hist.shape[1] - block_size[1] + 1, block_size[0], block_size[1], hist.shape[2]))
    
    for i in range(normalized_hist.shape[0]):
        for j in range(normalized_hist.shape[1]):
            block_hist = hist[i:i + block_size[0], j:j + block_size[1]]
            block_norm = np.sqrt(np.sum(block_hist ** 2) + epsilon)
            normalized_hist[i, j] = block_hist / block_norm
    
    return normalized_hist

def hog(image):
    magnitude, orientation = hog_compute_gradients(image)
    hist = hog_compute_histograms(magnitude, orientation)
    normalized_hist = hog_normalize_histograms(hist)
    return normalized_hist.flatten()





def lbp_compute_matrix(image):
    height, width = image.shape
    lbp_matrix = np.zeros((height, width), dtype=np.int32)

    for i in range(height):
        for j in range(width):
            center = image[i, j].astype(np.int32)

            east = 0
            if j + 1 < width and image[i, j + 1].astype(np.int32) >= center:
                east = 1
            
            north_east = 0
            if i - 1 >= 0 and j + 1 < width and image[i - 1, j + 1].astype(np.int32) >= center: 
                north_east = 1

            north = 0
            if i - 1 >= 0 and image[i - 1, j].astype(np.int32) >= center:
                north = 1

            north_west = 0
            if i - 1 >= 0 and j - 1 >= 0 and image[i - 1, j - 1].astype(np.int32) >= center:
                north_west = 1

            west = 0
            if j - 1 >= 0 and image[i, j - 1].astype(np.int32) >= center:
                west = 1

            south_west = 0
            if i + 1 < height and j - 1 >= 0 and image[i + 1, j - 1].astype(np.int32) >= center:
                south_west = 1

            south = 0
            if i + 1 < height and image[i + 1, j].astype(np.int32) >= center:
                south = 1

            south_east = 0
            if i + 1 < height and j + 1 < width and image[i + 1, j + 1].astype(np.int32) >= center:
                south_east = 1

            lbp_matrix[i, j] = east + 2 * north_east + 4 * north + 8 * north_west + 16 * west + 32 * south_west + 64 * south + 128 * south_east
    return lbp_matrix

def lbp_compute_histogram(lbp_matrix):
    height, width = lbp_matrix.shape
    lbp_hist = np.zeros(256, dtype=np.int32)
    for i in range(height):
        for j in range(width):
            lbp_hist[lbp_matrix[i, j]] += 1
    return lbp_hist

def normalize_histogram(lbp_hist, M):
    lbp_norm_hist = np.zeros(256, dtype=np.float32)
    for i in range(256):
        lbp_norm_hist[i] = lbp_hist[i] / M
    return lbp_norm_hist

def lbp(image, cell_size=(16, 16), bins=256, normalize=True):

    height, width = image.shape
    hist = np.zeros(shape=(height // cell_size[0], width // cell_size[1], bins))
    
    for i in range(hist.shape[0]):
        for j in range(hist.shape[1]):
            cell_image = image[i * cell_size[0]:(i + 1) * cell_size[0], j * cell_size[1]:(j + 1) * cell_size[1]]
            
            lbp_matrix = lbp_compute_matrix(cell_image)
            hist[i, j] = lbp_compute_histogram(lbp_matrix)
            if normalize:
                hist[i, j] = normalize_histogram(hist[i, j], height * width)
    
    return hist.flatten()





def histogram_3_channels(src, normalize=True):
    height, width = src.shape[:2]
    hist_blue = np.zeros(256, dtype=np.int32)
    hist_green = np.zeros(256, dtype=np.int32)
    hist_red = np.zeros(256, dtype=np.int32)
    for i in range(height):
        for j in range(width):
            hist_blue[src[i, j, 0]] += 1
            hist_green[src[i, j, 1]] += 1
            hist_red[src[i, j, 2]] += 1
    if normalize:
        hist_blue = normalize_histogram(hist_blue, height * width)
        hist_green = normalize_histogram(hist_green, height * width)
        hist_red = normalize_histogram(hist_red, height * width)
    final_hist = hist_blue + hist_green + hist_red
    return final_hist





def rgb_2_gray(src):
    height, width = src.shape[:2]
    dst = np.zeros((height, width), dtype=np.uint8)

    for i in range(height):
        for j in range(width):
            dst[i, j] = 0.299 * src[i, j, 0] + 0.587 * src[i, j, 1] + 0.114 * src[i, j, 2]

    return dst

def preprocess_image(img):
    height, width = img.shape[:2]
    gray_img = rgb_2_gray(img)

    resized_img = img
    if width * height < 64 * 128:
        resized_img = cv2.resize(img, (64, 128), interpolation = cv2.INTER_CUBIC)
    elif width * height > 64 * 128:
        resized_img = cv2.resize(img, (64, 128), interpolation = cv2.INTER_AREA)

    resized_gray_img = gray_img
    if width * height < 64 * 128:
        resized_gray_img = cv2.resize(gray_img, (64, 128), interpolation = cv2.INTER_CUBIC)
    elif width * height > 64 * 128:
        resized_gray_img = cv2.resize(gray_img, (64, 128), interpolation = cv2.INTER_AREA)

    return resized_img, resized_gray_img

def create_features(resized_img, resized_gray_img):
    hog_val = hog(resized_gray_img)
    lbp_val = lbp(resized_gray_img)
    histogram_3_channels_val = histogram_3_channels(resized_img)
    return hog_val, lbp_val, histogram_3_channels_val

# **Computing and saving (on disk) the features for the training and validation sets**

In [None]:
meta_data = pd.read_csv('../../datasets/archive/Meta.csv')

for cls in meta_data["ClassId"]:
    path = "../../datasets/archive/Train/{0}/".format(cls)
    files = os.listdir(path)
    for file in files:
        if os.path.isfile(path + file):
            train_image = cv2.imread(path + file, cv2.IMREAD_COLOR)
            file_name = os.path.splitext(file)[0]
            new_path = path + "features/" + file_name
            if not os.path.exists(new_path):
                os.makedirs(new_path)
            new_path = new_path + "/"
            resized_img, resized_gray_img = preprocess_image(train_image)
            hog_val, lbp_val, histogram_3_channels_val = create_features(resized_img, resized_gray_img)
            hog_path = new_path + "hog_" + file_name + ".npy"
            lbp_path = new_path + "lbp_" + file_name + ".npy"
            hist_path = new_path + "hist_" + file_name + ".npy"
            np.save(hog_path, hog_val)
            np.save(lbp_path, lbp_val)
            np.save(hist_path, histogram_3_channels_val)

# **Computing and saving (on disk) the features for the test set**

In [None]:
meta_data_test = pd.read_csv("../../datasets/archive/Test2.csv")
labels = meta_data_test['Path'].to_numpy()

for label in labels:
    test_image = cv2.imread('../../datasets/archive/' + label, cv2.IMREAD_COLOR)
    file_name = os.path.splitext(os.path.basename(label))[0]
    numeric_part = file_name.split('/')[-1]
    new_path = "../../datasets/archive/Test/features/" + numeric_part
    if not os.path.exists(new_path):
        os.makedirs(new_path)
    new_path = new_path + "/"
    resized_img, resized_gray_img = preprocess_image(test_image)
    histogram_3_channels_val = create_features(resized_img, resized_gray_img)
    hog_val, lbp_val, histogram_3_channels_val = create_features(resized_img, resized_gray_img)
    hog_path = new_path + "hog_" + numeric_part + ".npy"
    lbp_path = new_path + "lbp_" + numeric_part + ".npy"
    hist_path = new_path + "hist_" + numeric_part + ".npy"
    np.save(hog_path, hog_val)
    np.save(lbp_path, lbp_val)
    np.save(hist_path, histogram_3_channels_val)

# **Computing and saving (on disk) the features for MY test set**

In [None]:
meta_data_test_2 = pd.read_csv("../../datasets/archive/MyTest.csv")
initial_path = "../../datasets/archive/MyTest/result_images/"
images_paths = meta_data_test_2["Path"]

for image_path in images_paths:
    file_name = os.path.splitext(os.path.basename(image_path))[0]
    file_name_without_extension = file_name.split('/')[-1]
    folder_path = initial_path + file_name_without_extension + "/"
    files = os.listdir(folder_path)
    for file in files:
        if os.path.isfile(folder_path + file):
            test_image = cv2.imread(folder_path + file, cv2.IMREAD_COLOR)
            subfile_name = os.path.splitext(os.path.basename(folder_path + file))[0]
            subfile_name_without_extension = subfile_name.split('/')[-1]
            new_path = folder_path + "features/" + subfile_name_without_extension
            if not os.path.exists(new_path):
                os.makedirs(new_path)
            new_path = new_path + "/"
            resized_img, resized_gray_img = preprocess_image(test_image)
            hog_val, lbp_val, histogram_3_channels_val = create_features(resized_img, resized_gray_img)
            hog_path = new_path + "hog_" + subfile_name_without_extension + ".npy"
            lbp_path = new_path + "lbp_" + subfile_name_without_extension + ".npy"
            hist_path = new_path + "hist_" + subfile_name_without_extension + ".npy"
            np.save(hog_path, hog_val)
            np.save(lbp_path, lbp_val)
            np.save(hist_path, histogram_3_channels_val)

# **Loading (from disk) the features that were computed for the training and validation sets**

In [41]:
meta_data = pd.read_csv('../../datasets/archive/Meta.csv')

train_labels = []

hogs = []
lbps = []
hists = []

for cls in meta_data["ClassId"]:
    path = "../../datasets/archive/Train/{0}/".format(cls)
    files = os.listdir(path)
    for file in files:
        if os.path.isfile(path + file):
            file_name = os.path.splitext(file)[0]
            new_path = path + "features/" + file_name + "/"
            hog_path = new_path + "hog_" + file_name + ".npy"
            lbp_path = new_path + "lbp_" + file_name + ".npy"
            hist_path = new_path + "hist_" + file_name + ".npy"
            hogs.append(np.load(hog_path))
            lbps.append(np.load(lbp_path))
            hists.append(np.load(hist_path))
            train_labels.append(cls)

data = np.concatenate((np.array(hogs), np.array(lbps), np.array(hists)), axis=1)
labels = np.array(train_labels)

# **Performing the actual split of the initial datset into training and validation**

In [42]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.1, random_state=42, stratify=labels)

# **Implementation of the KNN Classifier**

In [51]:
class KNNClassifier:
    def __init__(self, k, original_labels):
        self.k = k
        self.original_labels = original_labels

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        distances = self.compute_distances(X_test)
        
        nearest_neighbors = np.argsort(distances, axis=1)[:, :self.k]
        nearest_labels = self.y_train[nearest_neighbors]
        
        y_pred = np.array([np.bincount(labels).argmax() for labels in nearest_labels])
        
        return y_pred

    def predict_proba(self, X_test):
        distances = self.compute_distances(X_test)
        
        nearest_neighbors = np.argsort(distances, axis=1)[:, :self.k]
        nearest_labels = self.y_train[nearest_neighbors]
        
        n_samples = X_test.shape[0]
        n_classes = len(self.original_labels)
        y_proba = np.zeros((n_samples, n_classes))
        
        for i, labels in enumerate(nearest_labels):
            counts = np.zeros(n_classes)
            for j, label in enumerate(self.original_labels):
                counts[j] = np.count_nonzero(labels == label)
            y_proba[i] = counts / self.k
        
        return y_proba
    
    def compute_distances(self, X_test):
        num_test = X_test.shape[0]
        num_train = self.X_train.shape[0]
        
        dot_product = np.dot(X_test, self.X_train.T)
        X_test_squared = np.sum(np.square(X_test), axis=1).reshape(num_test, 1)
        X_train_squared = np.sum(np.square(self.X_train), axis=1).reshape(1, num_train)
        
        distances = np.sqrt(X_test_squared + X_train_squared - 2 * dot_product)
        return distances

# **"Fitting" the model and assessing its performance on the validation set**

In [52]:
model = KNNClassifier(k=17, original_labels=meta_data["ClassId"])
model.fit(X_train, y_train)

y_pred = model.predict(X_val)

In [53]:
from sklearn import metrics

print("Accuracy: " + str(metrics.accuracy_score(y_pred, y_val)))
print("Precision: " + str(metrics.precision_score(y_pred, y_val, average=None)))
print("Recall: " + str(metrics.recall_score(y_pred, y_val, average=None)))
print("F1 Score: " + str(metrics.f1_score(y_pred, y_val, average=None)))

Accuracy: 0.954728002920774
Precision: [0.9047619  0.94144144 0.90666667 0.84397163 0.97474747 0.91935484
 0.93055556 0.96453901 0.98639456 0.98484848 0.9952381  1.
 0.97435897 0.93650794 1.         1.         0.90909091 0.97435897
 0.95       0.91666667 0.94444444 0.92592593 0.97777778 1.
 0.97435897 0.80952381]
Recall: [1.         0.96759259 0.94883721 0.91538462 0.91904762 0.83823529
 0.90540541 0.92517007 0.99315068 0.94890511 1.         0.99082569
 1.         1.         1.         1.         1.         0.92682927
 0.93442623 1.         0.98076923 1.         0.95652174 1.
 1.         1.        ]
F1 Score: [0.95       0.9543379  0.92727273 0.87822878 0.94607843 0.87692308
 0.91780822 0.94444444 0.98976109 0.96654275 0.99761337 0.99539171
 0.98701299 0.96721311 1.         1.         0.95238095 0.95
 0.94214876 0.95652174 0.96226415 0.96153846 0.96703297 1.
 0.98701299 0.89473684]


# **Loading (from disk) the features that were computed for the test set**

In [54]:
test_meta_data = pd.read_csv("../../datasets/archive/Test2.csv")
labels_test = test_meta_data['Path'].to_numpy()

hogs_test = []
lbps_test = []
hists_test = []

for label in labels_test:
    file_name = os.path.splitext(os.path.basename(label))[0]
    numeric_part = file_name.split('/')[-1]
    new_path = "../../datasets/archive/Test/features/" + numeric_part + "/"
    hog_path = new_path + "hog_" + numeric_part + ".npy"
    lbp_path = new_path + "lbp_" + numeric_part + ".npy"
    hist_path = new_path + "hist_" + numeric_part + ".npy"
    hogs_test.append(np.load(hog_path))
    lbps_test.append(np.load(lbp_path))
    hists_test.append(np.load(hist_path))

# **Assessing the performance of the model on the test set**

In [55]:
X_test = np.concatenate((np.array(hogs_test), np.array(lbps_test), np.array(hists_test)), axis=1)
y_test = test_meta_data['ClassId'].values
pred = model.predict(X_test)

  distances = np.sqrt(X_test_squared + X_train_squared - 2 * dot_product)


In [56]:
print("Accuracy: " + str(metrics.accuracy_score(pred, y_test)))
print("Precision: " + str(metrics.precision_score(pred, y_test, average=None)))
print("Recall: " + str(metrics.recall_score(pred, y_test, average=None)))
print("F1 Score: " + str(metrics.f1_score(pred, y_test, average=None)))

Accuracy: 0.7912162162162162
Precision: [0.51666667 0.57361111 0.684      0.57777778 0.89545455 0.76507937
 0.67555556 0.79777778 0.9        0.8        1.         0.99583333
 0.93333333 0.85714286 0.96666667 0.89166667 0.76666667 0.66666667
 0.66666667 0.78333333 0.54666667 0.67777778 0.34666667 0.93333333
 0.83333333 0.41666667]
Recall: [0.93939394 0.83434343 0.62637363 0.67357513 0.80189959 0.52334419
 0.73429952 0.69980507 0.91525424 0.78504673 0.98995696 1.
 1.         0.96774194 0.96666667 1.         0.66346154 0.55555556
 0.53097345 0.78333333 0.58992806 0.92424242 0.54736842 0.95789474
 1.         1.        ]
F1 Score: [0.66666667 0.67983539 0.65391969 0.62200957 0.84609878 0.62153449
 0.7037037  0.74558671 0.90756303 0.79245283 0.99495314 0.99791232
 0.96551724 0.90909091 0.96666667 0.94273128 0.71134021 0.60606061
 0.591133   0.78333333 0.56747405 0.78205128 0.4244898  0.94545455
 0.90909091 0.58823529]


# **Augmenting the training set with the validation and test sets (I can do this because I have my separate test set on which I will perform the predictions)**

In [78]:
X_train_full = np.concatenate((X_train, X_val, X_test), axis=0)
y_train_full = np.concatenate((y_train, y_val, y_test), axis=0)

In [79]:
model_full = KNNClassifier(k=17, original_labels=meta_data["ClassId"])
model_full.fit(X_train_full, y_train_full)

# **Method for creating the final image that includes the predicted signs in the top right corner**

In [92]:
def create_final_image(file_name_without_extension, label_pred_array, size=40):
    initial_image_path = "../../datasets/archive/MyTest/" + file_name_without_extension + ".png"
    intitial_image = cv2.imread(initial_image_path, cv2.IMREAD_COLOR)
    overlay_images_path = "../../datasets/archive/Meta/"
    result_image_path = "../../datasets/archive/MyTest/final_results/"
    height, width = intitial_image.shape[:2]
    if size * len(label_pred_array) < height:
        for i, label in enumerate(label_pred_array):
            overlay_image = cv2.imread(overlay_images_path + str(label) + ".png", cv2.IMREAD_COLOR)
            resized_overlay_image = cv2.resize(overlay_image, (size, size), interpolation = cv2.INTER_AREA)
            intitial_image[i * size:(i + 1) * size, width - size:width] = resized_overlay_image
        cv2.imwrite(result_image_path + file_name_without_extension + ".png", intitial_image)

# **Loading the features that were computed for MY test set and creating the final images**

In [93]:
def convert_to_list(s):
    s = s.strip('[]')
    if not s:
        return []
    return list(map(int, s.split(';')))

meta_data_test_2 = pd.read_csv("../../datasets/archive/MyTest.csv")
initial_path = "../../datasets/archive/MyTest/result_images/"
images_paths = meta_data_test_2["Path"]
label_paths = meta_data_test_2["ClassId"]
label_paths_arrays = [convert_to_list(label) for label in label_paths]

proba_threshold = 0.85
final_pred = []

for images_path, label_path_array in zip(images_paths, label_paths_arrays):
    hogs_my_test = []
    lbps_my_test = []
    hists_my_test = []

    file_name = os.path.splitext(os.path.basename(images_path))[0]
    file_name_without_extension = file_name.split('/')[-1]
    folder_path = initial_path + file_name_without_extension + "/"
    files = os.listdir(folder_path)

    for file in files:
        if os.path.isfile(folder_path + file):
            subfile_name = os.path.splitext(os.path.basename(folder_path + file))[0]
            subfile_name_without_extension = subfile_name.split('/')[-1]
            new_path = folder_path + "features/" + subfile_name_without_extension + "/"

            hog_path = new_path + "hog_" + subfile_name_without_extension + ".npy"
            lbp_path = new_path + "lbp_" + subfile_name_without_extension + ".npy"
            hist_path = new_path + "hist_" + subfile_name_without_extension + ".npy"
            hogs_my_test.append(np.load(hog_path))
            lbps_my_test.append(np.load(lbp_path))
            hists_my_test.append(np.load(hist_path))

    label_pred_array = []
    if hogs_my_test != []:
        X_my_test = np.concatenate((np.array(hogs_my_test), np.array(lbps_my_test), np.array(hists_my_test)), axis=1)
        pred_my_test = model_full.predict(X_my_test)
        pred_proba_my_test = model_full.predict_proba(X_my_test)
        for pred, pred_proba in zip(pred_my_test, pred_proba_my_test):
            max_pred_proba = np.max(pred_proba)
            if max_pred_proba > proba_threshold and pred not in label_pred_array:
                label_pred_array.append(pred)
    final_pred.append(label_pred_array)
    create_final_image(file_name_without_extension, label_pred_array)

# **Assessing the performance of the model on MY test set**

In [106]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()

mlb.fit(final_pred + label_paths_arrays)

final_pred_binary = mlb.transform(final_pred)
label_paths_binary = mlb.transform(label_paths_arrays)

print("Accuracy: " + str(metrics.accuracy_score(label_paths_binary, final_pred_binary)))
print("Precision: " + str(metrics.precision_score(label_paths_binary, final_pred_binary, average='macro')))
print("Recall: " + str(metrics.recall_score(label_paths_binary, final_pred_binary, average='macro')))
print("F1 Score: " + str(metrics.f1_score(label_paths_binary, final_pred_binary, average='macro')))

Accuracy: 0.1275820170109356
Precision: 0.2513973849657943
Recall: 0.09932880125903382
F1 Score: 0.08323461031418082


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
