In [550]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

In [551]:
data = pd.read_csv('wall-robot-nav.csv').sample(frac=1)
X = data.loc[:, 'V1':'V24']
X = X / np.array(X).max(axis=0)
y = data['Class']
train_size = math.ceil(X.shape[0]*0.8)
test_size = X.shape[0] - train_size
X_train = X.head(train_size)
y_train = y.head(train_size)
X_test = X.tail(test_size)
y_test = y.tail(test_size)
y_train_one_hot = np.array([np.array(y_train)==i for i in range(1,5)]).astype('int8').T
y_test_one_hot = np.array([np.array(y_test)==i for i in range(1,5)]).astype('int8').T

In [552]:

def distance_euclid(x, xi):
    return np.sqrt((np.square(x-xi)).sum(axis=1))


def uniform_kernel(x):
    new_x = x
    new_x[new_x<1] = 0.5
    new_x[new_x>=1] = 0
    return new_x


class KernelRegressionClassifier:
    vectorized_ceil = np.vectorize(math.ceil)
    distance_functions = {
        'euclid': distance_euclid
    }
    kernels = {
        'uniform': uniform_kernel,
    }


    def __init__(self, k=10, h=None, kernel_name='uniform', distance_func_name='euclid'):
        self.k = k
        self.h = h
        self.kernel_name = kernel_name
        self.distance_func_name = distance_func_name
        self.kernel = self.kernels[self.kernel_name]
        self.distance_func = self.distance_functions[self.distance_func_name]


    @classmethod
    def from_data(cls, X, y, h, k=10, kernel_name='uniform', distance_func_name='euclid'):
        instance = cls(h,k,kernel_name,distance_func_name)
        instance.fit(X, y)
        return instance


    def fit(self, X, y):
        self.X = X
        self.y = np.array(y)


    def multiply_weihts_of_nearest(self, weights, indexes, y):
        res = []
        for i in range(len(indexes)):
            res.append((weights[i][indexes[i]] * y[indexes[i]]).sum() / weights[i][indexes[i]].sum())
        return res


    def predict(self, x):
        X_distance = np.apply_along_axis(self.distance_func, 1, x, self.X)
        nearest_indexes_matrix = X_distance.argsort()[:,:self.k]
        window_width = self.h
        if self.h is None:
            window_width = []
            for row in X_distance:
                window_width.append(row[row.argsort()[self.k]])
            window_width = np.array([window_width]).T
        X_smoothed = self.kernel(X_distance/window_width)
        if len(self.y.shape) == 2:
            return np.argmax(np.array([
                self.multiply_weihts_of_nearest(X_smoothed, nearest_indexes_matrix, self.y[:,i])
                for i in range(self.y.shape[1])
            ]).T, axis=1)+1
        real_weights = self.multiply_weihts_of_nearest(X_smoothed, nearest_indexes_matrix, self.y)
        return self.vectorized_ceil(real_weights)

In [553]:
def analise(y, y_pred):
    tp_list = [np.all([y==y_pred, y_pred==i+1], axis=0).astype('int8').sum() for i in range(4)]
    recall_mean = np.mean([tp_list[i] / (y == i+1).astype('int8').sum() for i in range(4) ])
    precision_mean = np.mean([tp_list[i]/ (y_pred == i+1).astype('int8').sum() for i in range(4) ])
    f_measure = 2*precision_mean*recall_mean/(precision_mean+recall_mean)
    print(f_measure)

In [554]:
for k in range(1,21):
    clf = KernelRegressionClassifier(k=k)
    # clf.fit(X_train, y_train)
    clf.fit(X_train, y_train_one_hot)
    y_test_pred = clf.predict(X_test)
    analise(np.array(y_test), y_test_pred)

# from sklearn.metrics import confusion_matrix
# import seaborn as sns
#
# cm = confusion_matrix(y_test, y_test_pred)
# plt.figure(dpi=200)
# sns.set(font_scale=0.6)
# g = sns.heatmap(cm, annot=True, annot_kws={"size": 5}, cmap="YlGnBu")
# plt.show()

[2 2 4 ... 2 1 1]
0.8811966871578408
[2 2 4 ... 2 1 1]
0.8670867180895042
[2 2 4 ... 2 1 1]
0.8739100653402976
[2 2 4 ... 2 1 1]
0.8558621454611628
[2 2 4 ... 2 1 1]
0.8641964090643517
[2 2 4 ... 2 1 1]
0.8444283776489175
[2 2 4 ... 2 1 1]
0.8529335910175225
[2 2 4 ... 2 1 1]
0.8489366880569961
[2 2 4 ... 2 1 1]
0.8459989256317354
[2 2 4 ... 2 1 1]
0.8372659220356303
[2 2 4 ... 2 1 1]
0.8376786033328191
[2 2 4 ... 2 1 1]
0.8318988592963785
[2 2 4 ... 2 1 1]
0.8364730451823643
[2 2 4 ... 2 1 1]
0.8315309245327853
[2 2 4 ... 2 1 1]
0.8350246400703556
[2 2 4 ... 2 1 1]
0.8317757478291851
[2 2 4 ... 2 1 1]
0.8387353453956976
[2 2 4 ... 2 1 1]
0.8235986439686583
[2 2 4 ... 2 1 1]
0.8220732161780607
[2 2 4 ... 2 1 1]
0.8201636419080099
[2 2 4 ... 2 1 1]
0.8196068248087547
[2 2 4 ... 2 1 1]
0.8214453200643785
[2 2 4 ... 2 1 1]
0.8115272229619412
[2 2 4 ... 2 1 1]
0.8067481115549995
[2 2 4 ... 2 1 1]
0.8073601207607526
[2 2 4 ... 2 1 1]
0.7950801664687296
[2 2 4 ... 2 1 1]
0.7960437844213506
[