In [68]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [69]:
data = pd.read_csv('wall-robot-nav.csv').sample(frac=1)
X = data.loc[:, 'V1':'V24']
X = X / np.array(X).max(axis=0) #normolize features
y = data['Class'] - 1 # reduce classes values from 1,2,3,4 to 0,1,2,3

train_size = np.ceil(X.shape[0]*0.8).astype('int32')
test_size = X.shape[0] - train_size

X_train = X.head(train_size)
y_train = y.head(train_size)
X_test = X.tail(test_size)
y_test = y.tail(test_size)

y_train_one_hot = np.eye(4, dtype=np.int8)[y_train]
y_test_one_hot = np.eye(4, dtype=np.int8)[y_test]

In [70]:
class KernelRegressionClassifier:
    distance_functions = {
        'euclid': lambda x, xi: np.sqrt((np.square(x-xi)).sum(axis=1)),
        'manhattan': lambda x, xi: np.abs(x-xi).sum(axis=1),
    }
    kernels = {
        'uniform': lambda x: np.where(x<1, 0.5, 0),
        'triweight': lambda x: np.where(x<1, (35/32 *np.power((1-np.square(x)),3)), 0),
        'triangular': lambda x: np.where(x<1, 1-x, 0),
        'quartic': lambda x: np.where(x<1, (15/16 *np.power((1-np.square(x)),2)), 0),
        'epanechnikov': lambda x: np.where(x<1, (3/4 *(1-np.square(x))) , 0),
        'cosine': lambda x: np.where(x<1, (np.pi/4 *(1-np.square(x))) , 0),
        'gaussian': lambda x: (1/np.sqrt(2* np.pi)) * np.exp((-1/2) * np.square(x)),
    }


    def __init__(self, k=10, h=None, kernel_name='uniform', distance_func_name='euclid'):
        self.k = k
        self.h = h
        self.kernel_name = kernel_name
        self.distance_func_name = distance_func_name
        self.kernel = self.kernels[self.kernel_name]
        self.distance_func = self.distance_functions[self.distance_func_name]


    @classmethod
    def from_data(cls, X, y, h, k=10, kernel_name='uniform', distance_func_name='euclid'):
        instance = cls(h,k,kernel_name,distance_func_name)
        instance.fit(X, y)
        return instance


    def fit(self, X, y):
        self.X = X
        self.y = np.array(y)


    def multiply_weihts_of_nearest(self, weights, indexes, y):
        res = []
        for i in range(len(indexes)):
            res.append((weights[i][indexes[i]] * y[indexes[i]]).sum() / weights[i][indexes[i]].sum())
        return res


    def predict(self, x):
        X_distance = np.apply_along_axis(self.distance_func, 1, x, self.X)
        nearest_indexes_matrix = X_distance.argsort()[:,:self.k]
        window_width = self.h
        if self.h is None:
            window_width = []
            for row in X_distance:
                window_width.append(row[row.argsort()[self.k]])
            window_width = np.array([window_width]).T
        X_smoothed = self.kernel(X_distance/window_width)

        if len(self.y.shape) == 2:
            return np.array([
                self.multiply_weihts_of_nearest(X_smoothed, nearest_indexes_matrix, self.y[:,i])
                for i in range(self.y.shape[1])
            ]).T
        real_weights = self.multiply_weihts_of_nearest(X_smoothed, nearest_indexes_matrix, self.y)
        return np.ceil(real_weights).astype('int8')

In [71]:
def analise(y, y_pred):
    tp_list = [np.all([y==y_pred, y_pred==i], axis=0).astype('int8').sum() for i in range(4)]
    recall_mean = np.mean([tp_list[i] / (y == i).astype('int8').sum() for i in range(4) ])
    precision_mean = np.mean([tp_list[i]/ (y_pred == i).astype('int8').sum() for i in range(4) ])
    f_measure = 2*precision_mean*recall_mean/(precision_mean+recall_mean)
    print('    f measure: ', f_measure)

In [72]:
for kernel_name in KernelRegressionClassifier.kernels.keys():
    print('                 kernel: ', kernel_name)
    for h in np.arange(0.1, 1.5,0.05):
        clf = KernelRegressionClassifier(kernel_name=kernel_name, h=h, distance_func_name='manhattan')
        # clf.fit(X_train, y_train)
        clf.fit(X_train, y_train_one_hot)
        y_test_pred = clf.predict(X_test).argmax(axis=1)
        print('h: ', h)
        analise(np.array(y_test), y_test_pred)

# from sklearn.metrics import confusion_matrix
# import seaborn as sns
#
# cm = confusion_matrix(y_test, y_test_pred)
# plt.figure(dpi=200)
# sns.set(font_scale=0.6)
# g = sns.heatmap(cm, annot=True, annot_kws={"size": 5}, cmap="YlGnBu")
# plt.show()

                 kernel:  uniform
h:  0.1
    f measure:  0.5336164013072194
h:  0.15000000000000002
    f measure:  0.5772859865178125
h:  0.20000000000000004
    f measure:  0.6147525223021467
h:  0.25000000000000006
    f measure:  0.6714013653369375
h:  0.30000000000000004
    f measure:  0.6979134009888639
h:  0.3500000000000001
    f measure:  0.7101265024719846
h:  0.40000000000000013
    f measure:  0.7297618382213089


  res.append((weights[i][indexes[i]] * y[indexes[i]]).sum() / weights[i][indexes[i]].sum())


KeyboardInterrupt: 