In [None]:
%autosave 15
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

import numpy as np
import scipy as sp
from scipy import stats as sps
import scipy.optimize as sopt
import scipy.stats
from scipy.stats import norm

from collections import namedtuple
from functools import partial
from IPython.display import display, HTML

import math
import random

In [None]:
data = pd.read_csv("chips.txt", sep=',', header = None)

xy = data.values[:, :2]
color = data.values[:, 2].astype('int')

for i in range(len(color)):
    if (color[i] == 0):
        color[i] = -1
        
perm = list(range(len(xy)))
random.shuffle(perm)
xy = xy[perm]
color = color[perm]

In [None]:
def printAllPoints(xy, color):
    green = [[], []]
    blue = [[], []]
    for i in range(len(xy)):
        if (color[i] != 1):
            green[0].append(xy[i, 0])
            green[1].append(xy[i, 1])
        else:
            blue[0].append(xy[i, 0])
            blue[1].append(xy[i, 1])
    plt.plot(green[0], green[1], 'g.', blue[0], blue[1], 'b.')

In [None]:
def drawPrediction(plot_name, xy, actual, classifier):
    classifier.fit(xy, actual)
    
    xs = [p[0] for p in xy]
    ys = [p[1] for p in xy]
    green = [[], []]
    blue = [[], []]
    
    eps = 0.1
           
    for x in np.arange(min(xs) - eps, max(xs) + eps, 0.01):
        for y in np.arange(min(ys) - eps, max(ys) + eps, 0.01):
            predicted = classifier.predict(np.array([[x, y]]))

            if (predicted != 1):
                green[0].append(x)
                green[1].append(y)
            else:
                blue[0].append(x)
                blue[1].append(y)

    plt.title(plot_name)
    plt.plot(green[0], green[1], 'xkcd:lightgreen', blue[0], blue[1], '#ADD8E6')
    
    printAllPoints(xy, actual)

In [None]:
printAllPoints(xy, color)

In [None]:
def getConfusion(predicted, actual):
    true_positive  = np.sum(np.logical_and(predicted == 1, predicted == actual))
    false_positive = np.sum(np.logical_and(predicted == 1, predicted != actual))
    false_negative  = np.sum(np.logical_and(predicted != 1, predicted != actual))
    true_negative = np.sum(np.logical_and(predicted != 1, predicted == actual))

    return [[true_positive, false_positive], [false_negative, true_negative]]
    

def getF1Score(conf):
    tp, fp = conf[0]
    fn, tn = conf[1]
    
    recall = 0
    if (tp + fn != 0):
        recall = (1.0 * tp) / (tp + fn)
    
    precision = 0
    if (tp + fp != 0):
        precision = (1.0 * tp) / (tp + fp)
    
    f1 = 0
    if (precision + recall > 0):
        f1 = 2.0 * precision * recall / (precision + recall)
    return f1

In [None]:
def k_fold_cv(classifier, k = 10):    
    xy_parts = np.array_split(xy, k)
    color_parts = np.array_split(color, k)
    
    result = np.array([])
    for i in range(k):
        xy_train = np.concatenate(np.delete(xy_parts, i, 0))
        color_train = np.concatenate(np.delete(color_parts, i, 0))
        xy_test = xy_parts[i]
        
        classifier.fit(xy_train, color_train)
        result = np.append(result, classifier.predict(xy_test))
        
    return result

In [None]:
def square_kernel(x, y):
    return np.dot(x, y) ** 2

def paraboloid_kernel(x, y):
    return np.dot([x[0], x[1], x[0]**2 + x[1]**2], [y[0], y[1], y[0]**2 + y[1]**2])


#def parab(data):
#    w = np.array([0, 0], dtype='float64')
#    input_data, classes, transform_name, transform = data
#    for point in input_data:
#        w += np.array(point, dtype='float64')
#    w /= len(data)
#    def kernel(x, y):
#        return simple_kernel([x[0], x[1], (x[0] - w[0])**2 + (x[1] - w[1])**2], [y[0], y[1], (y[0] - w[0])**2 + (y[1] - w[1])**2])
#    return kernel


def paraboloid_kernel2(x, y):
    return square_kernel([x[0], x[1], x[0]**2 + x[1]**2], [y[0], y[1], y[0]**2 + y[1]**2])

def gaussianKernel(sigma=1):
    FG = lambda x : np.array([(x[0]**2 + x[1]**2) , x[0], x[1]])
    return lambda x, y: np.exp(-np.dot(FG(np.array(x) - np.array(y)),  FG(np.array(x) - np.array(y))) / (2 * (sigma ** 2)))

#def gaussianKernel(x, y):
#    sigma = 1.0
#    return np.exp(-np.sum((np.array(x) - np.array(y))**2) / (2 * sigma**2))
                  
def poly_kernel(x, y):
    return (sum(np.array(x) * np.array(y)) * 0.1 + 1)**4


def expKern(x, y):
    return 2.7182818284590452354 ** (- (np.linalg.norm(np.array(x) - np.array(y)) ** 2) / 2)

def poly3(x, y):
    return (np.dot(np.array(x), np.array(y)) + 1) ** 3

def wikiKern(x, y):
    npx, npy = np.array(x), np.array(y)
    return np.dot(npx, npy)  + (np.linalg.norm(npx) ** 2) * (np.linalg.norm(npy) ** 2)

svm_kernels = [(np.dot, '<x, y>'),
               (expKern, 'expKern'),
               (poly3, 'poly3'),
               (poly_kernel, 'polynomial'),
               (paraboloid_kernel, 'paraboloid_kernel'),
               (square_kernel, 'square_kerne'),
               (paraboloid_kernel2, 'paraboloid_kernel2')
        #(parab(simpleData), 'square paraboloid')
       ]

In [None]:
class SuperSvm():
    
    def __init__(self, c, kernel):
        self.c = c
        self.kernel = kernel
        
    def kernel_vec(self, point):
        return np.apply_along_axis(lambda x_i: self.kernel(x_i, point), 1, self.xy)
        
        
    def fit(self, xy, color):
        n = len(xy)
        vect_c = self.c * np.ones(n)
        lagrange_gradient = np.fromfunction(
            np.vectorize(lambda i, j: color[i] * color[j] * self.kernel(xy[i], xy[j])),
            (n, n), 
            dtype=int
        )
        
        start = np.random.randn(n)
        cons = [ {"type": "eq",   "fun": lambda x: np.dot(color, x)}# , "jac": lambda x: color}
               , {"type": "ineq", "fun": lambda x: x} #,                "jac": lambda x:  np.eye(n)}
               , {"type": "ineq", "fun": lambda x: vect_c - x}] #,       "jac": lambda x: -np.eye(n)}]
        
        lagrange = lambda x: 0.5 * np.dot(x.T, np.dot(lagrange_gradient, x)) - np.dot(vect_c, x)
        lagrange_jac = lambda x: np.dot(x.T, lagrange_gradient) - vect_c
        
        #и здесь тоже
        lambdas = sopt.minimize(lagrange, 
                                np.random.randn(n), 
                                #jac=lagrange_jac, 
                                constraints=cons)
        
        dim = len(xy[0])
        self.w = np.array([0 for i in range(dim)], dtype='float64')
        for i in range(len(xy)):
            lambda_i = lambdas.x[i]
            color_i = color[i]
            point_i = xy[i]
            self.w += np.array([lambda_i * color_i * point_i[j] for j in range(dim)], dtype='float64')
            
      
        self.l_y = lambdas.x * color
        self.xy = xy
        
        support_indices = np.nonzero(np.logical_and(lambdas.x > eps, lambdas.x < self.c - eps))[0]
#         print(support_indices, opt.x[support_indices])
        self.w_0 = 0
        if (len(support_indices) > 0):
            sv_index = support_indices[0]
            self.w_0 = np.dot(self.l_y, self.kernel_vec(xy[sv_index])) - color[sv_index]    
        
        #self.w0 = 0
        #count = 0
        #for i in range(len(lambdas.x)):
        #    lambda_i = lambdas.x[i]
        #    if lambda_i > 0:
        #        self.w0 += self.kernel(self.w, xy[i]) - color[i]
        #        count += 1
        #if count != 0:
        #    self.w0 /= count
            
    
    def predictPoint(self, point):
        result = self.kernel(point, self.w) - self.w0
        if (result > 0):
            return 1
        else:
            return -1
               
    def predict2(self, points):
        res = [self.predictPoint(point) for point in points]   
        return np.array(res) 
    
    
    def predict(self, points):
        ans = []
        for point in points:
            if (np.dot(self.l_y, self.kernel_vec(point)) - self.w_0) > 0:
                ans.append(1)
            else:
                ans.append(-1)
        return np.array(ans, dtype = np.int)

In [None]:
results = pd.DataFrame(columns=['c', 'kernel', 'f1'])
svm_best_result = 0
svm_best_params = ()

eps = 1e-8

for c in [0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1, 5, 8]:
    print(c)
    for kernel in svm_kernels:
        svm_classifier = SuperSvm(c, kernel[0])
        predicted = k_fold_cv(svm_classifier)
        conf = getConfusion(predicted, color)
        cur_f1 = getF1Score(conf)
          
        raw = pd.DataFrame([[c, kernel[1], cur_f1]], columns=['c','kernel', 'f1'])
        #print(raw)
        results = results.append(raw, ignore_index=True)
               
        if (cur_f1 > svm_best_result):
            svm_best_result = cur_f1
            svm_best_params = (c, kernel)        

In [None]:
with pd.option_context('display.max_rows', None):
    display(results)

In [None]:
c, kernel = svm_best_params
print(svm_best_result, "c:", c, "kernel:", kernel[1])
drawPrediction('svn_best', xy, color, SuperSvm(c, kernel[0]))

In [None]:
def minkowskiDistance(x, y, p):
    res = 0
    for i in range(len(x)):
        res += abs(x[i] - y[i]) ** p
    return res ** (1 / p)

# https://en.wikipedia.org/wiki/Cosine_similarity
def cosineSimilarity(x, y):
    res, a, b = 0, 0, 0
    for i in range(len(x)):
        res += x[i] * y[i]
        a += x[i] ** 2
        b += y[i] ** 2
    a = a ** (1 / 2)
    b = b ** (1 / 2)
    return res / a / b

knn_metrics = [
    (lambda x, y: minkowskiDistance(x, y, 1), 'minkowski_1'), 
    (lambda x, y: minkowskiDistance(x, y, 2), 'minkowski_2')
]   


knn_kernels = [
    (lambda x: 1 / 2, 'uniform'),
    (lambda x: 1 - abs(x), 'triangular'),
    (lambda x: 3 / 4 * (1 - x * x), 'parabolic'),
    (lambda x: (1 - x ** 2) ** 2 * 15 / 16, 'quartic')
]

In [None]:
class knn():
    def __init__(self, k, metric, kernel):
        self.k = k
        self.metric = metric
        self.kernel = kernel
        
    def fit(self, xy, color):
        self.xy = xy
        self.n = len(xy)
        self.color = color
        
    def predictPoint(self, point):
        dist = [(self.metric(self.xy[i], point), i) for i in range(self.n)]
        dist.sort()
        d = dist[self.k][0]
        if (d == 0):
            d = 1
        s = [0] * 3
        for i in range(k):
            cur_dist, index = dist[i]
            s[self.color[index] + 1] += self.kernel(cur_dist / d)
        if (s[0] > s[2]):
            return -1
        else:
            return 1
               
    def predict(self, points):
        res = [self.predictPoint(point) for point in points]   
        return np.array(res) 

In [None]:
results = pd.DataFrame(columns=['transform', 'k', 'metric'])

knn_best_result = []
knn_best_params = []

#добавить трансформацию для knn
#убрать динамический подбор количества фолдов
for transform in [7]:
    knn_best_result.append(0)
    knn_best_params.append(())
    for k in [4, 5, 6, 7, 8]:
        for metric in knn_metrics:
            for kernel in knn_kernels:
                knn_classifier = knn(k, metric[0], kernel[0])
                predicted = k_fold_cv(knn_classifier)
                conf = getConfusion(predicted, color)
                cur_f1 = getF1Score(conf)
                
                raw = pd.DataFrame([[transform, k, metric[1], kernel[1], cur_f1]], columns=['transform', 'k', 'metric', 'kernel', 'f1'])
                results = results.append(raw, ignore_index=True)
               
                if (cur_f1 > knn_best_result[-1]):
                    knn_best_result[-1] = cur_f1
                    knn_best_params[-1] = (transform, k, metric, kernel)
                    
print(results)

In [None]:
figureNumber = 0
for i in range(len(knn_best_result)):
    f, k, metric, kernel = knn_best_params[i]
    print(knn_best_result[i], "folds:", f, "k:", k, metric[1], kernel[1])
    figureNumber += 1
    plt.figure(figureNumber)
    drawPrediction('knn_best_' + str(i), xy, color, knn(k, metric[0], kernel[0]))

In [None]:
# H0: difference between predicted classes follows a symmetric distribution around zero
# H1: difference between predicted does not follow a symmetric distribution around zero.

# reference link: https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test

# two-sided, for one-sided count only + or - values
# Significance Level: 0.01 or 0.05?
def getWilcoxonRank(res1, res2):
    diff = []
    for i in range(len(res1)):
        if (res2[i] - res1[i] != 0):
            m = abs(res2[i] - res1[i])
            diff.append([m, (res2[i] - res1[i]) / m, 0])
    diff.sort()
    n = len(diff)

    for r in range(n):
        diff[r][2] = r + 1
    
    i = 0
    while (i < n):
        s = 0
        prev = i
        while ((i < n) and (diff[i][0] == diff[prev][0])):
            s += diff[i][2]
            i += 1
        if (i - prev > 1):
            for t in range(prev, i):
                diff[t][2] = s / (i - prev)
    
    ans_m = 0
    ans_p = 0
    for i in range(n):
        if (diff[i][1] * diff[i][2] < 0):
            ans_m -= diff[i][1] * diff[i][2]
        else:
            ans_p += diff[i][1] * diff[i][2]
    
    
    nr = len(diff)
    z = (ans_m + ans_p) * 1.0 / ((nr * (nr + 1) * (2 * nr + 1) / 6.0) ** 2)
    
    #z = 
    prob = 2. * norm.sf(abs(z))
    #return (ans, z, prob)
    
    return ans_m, ans_p, z, prob

In [None]:
c, kernel = svm_best_params
svm_classifier = SuperSvm(c, kernel[0])
svm_predicted = k_fold_cv(svm_classifier)
svm_conf = getConfusion(svm_predicted, color)
svm_f1 = getF1Score(svm_conf)

f, k, metric, kernel = knn_best_params[i]
knn_classifier = knn(k, metric[0], kernel[0])
knn_predicted = k_fold_cv(knn_classifier)
knn_conf = getConfusion(knn_predicted, color)
knn_f1 = getF1Score(knn_conf)

    
wr = getWilcoxonRank(knn_predicted, svm_predicted)

print(len(knn_predicted))
wr2 = scipy.stats.wilcoxon(knn_predicted, svm_predicted)
print("WR:", wr)
print("WR2:", wr2)
    
    
print("svm f1:", svm_f1)
print("knn f1:", knn_f1)