In [1]:
# FOR KNN

In [2]:
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import train_test_split

class KNN(object):
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors

    def fit(self, X, y):
        self.X = X
        self.y = y
        return self

    def _distance(self, data1, data2):
        return np.sqrt(sum((data1 - data2)**2))

    def _compute_weights(self, distances):
        return [(1, y) for d, y in distances]

    def _predict_one(self, test):
        distances = sorted((self._distance(x, test), y) for x, y in zip(self.X, self.y))
        weights = self._compute_weights(distances[:self.n_neighbors])
        weights_by_class = defaultdict(list)
        for d, c in weights:
            weights_by_class[c].append(d)
        return max((sum(val), key) for key, val in weights_by_class.items())[1]

    def predict(self, X):
        return [self._predict_one(i) for i in X]
    
    def score(self, X, y):
        return sum(1 for p, t in zip(self.predict(X), y) if p == t) / len(y)

In [3]:
dataset = np.loadtxt("paf.csv", delimiter=",")
        
datadataset = np.loadtxt("paf.csv", delimiter=",")
train, test = train_test_split(dataset, test_size=0.3)
        
X_train = train[:,0:33]
y_train = train[:,33]
        
X_test = test[:,0:33]
y_test = test[:,33]


# print(neighbor.score(X_train, y_train))
# print(neighbor.score(X_test, y_test))

In [4]:
# FOR TRAİN DATASET
results = []
for n in range(1, 20, 2):
    
    neighbor = KNN(n_neighbors=n).fit(X_train, y_train)
    TrainAcc = neighbor.score(X_train, y_train)
    print ("Neighbors: %d, Train Accuracy: %3f" % (n, TrainAcc))
    results.append([n, TrainAcc])

Neighbors: 1, Train Accuracy: 1.000000
Neighbors: 3, Train Accuracy: 0.931900
Neighbors: 5, Train Accuracy: 0.917563
Neighbors: 7, Train Accuracy: 0.897849
Neighbors: 9, Train Accuracy: 0.881720
Neighbors: 11, Train Accuracy: 0.865591
Neighbors: 13, Train Accuracy: 0.865591
Neighbors: 15, Train Accuracy: 0.862007
Neighbors: 17, Train Accuracy: 0.849462
Neighbors: 19, Train Accuracy: 0.860215


In [5]:
# FOR TEST DATASET
for n in range(1, 20, 2):
    
    neighbor = KNN(n_neighbors=n).fit(X_train, y_train)
    TestAcc = neighbor.score(X_test, y_test)
    print ("Neighbors: %d, Train Accuracy: %3f" % (n, TestAcc))

Neighbors: 1, Train Accuracy: 0.870833
Neighbors: 3, Train Accuracy: 0.858333
Neighbors: 5, Train Accuracy: 0.870833
Neighbors: 7, Train Accuracy: 0.841667
Neighbors: 9, Train Accuracy: 0.841667
Neighbors: 11, Train Accuracy: 0.837500
Neighbors: 13, Train Accuracy: 0.841667
Neighbors: 15, Train Accuracy: 0.829167
Neighbors: 17, Train Accuracy: 0.829167
Neighbors: 19, Train Accuracy: 0.837500


In [6]:
# FOR WEİGHTED KNN

In [9]:
import numpy as np
from collections import defaultdict

class WKNN(object):
    def __init__(self, n_neighbors=5, weights='distance', p=2):
        self.n_neighbors = n_neighbors
        self.weights = weights
        self.p = p

    def fit(self, X, y):
        self.X = X
        self.y = y
        return self

    def _distance(self, data1, data2):
        """1: Manhattan, 2: Euclidean"""
        if self.p == 1:
            return sum(abs(data1 - data2))          
        elif self.p == 2:
            return np.sqrt(sum((data1 - data2)**2))
        raise ValueError("p not recognized: should be 1 or 2")

    def _compute_weights(self, distances):
        if self.weights == 'uniform':
            return [(1, y) for d, y in distances]
        elif self.weights == 'distance':
            matches = [(1, y) for d, y in distances if d == 0]
            return matches if matches else [(1/d, y) for d, y in distances]
        raise ValueError("weights not recognized: should be 'uniform' or 'distance'")

    def _predict_one(self, test):
        distances = sorted((self._distance(x, test), y) for x, y in zip(self.X, self.y))
        weights = self._compute_weights(distances[:self.n_neighbors])
        weights_by_class = defaultdict(list)
        for d, c in weights:
            weights_by_class[c].append(d)
        return max((sum(val), key) for key, val in weights_by_class.items())[1]

    def predict(self, X):
        return [self._predict_one(x) for x in X]

    def score(self, X, y):
        return sum(1 for p, t in zip(self.predict(X), y) if p == t) / len(y)

In [10]:
# FOR TRAİN DATASET
results = []
for n in range(1, 20, 2):
    
    neighbor = WKNN(n_neighbors=n).fit(X_train, y_train)
    TrainAcc = neighbor.score(X_train, y_train)
    print ("Neighbors: %d, Train Accuracy: %3f" % (n, TrainAcc))
    results.append([n, TrainAcc])

Neighbors: 1, Train Accuracy: 1.000000
Neighbors: 3, Train Accuracy: 1.000000
Neighbors: 5, Train Accuracy: 1.000000
Neighbors: 7, Train Accuracy: 1.000000
Neighbors: 9, Train Accuracy: 1.000000
Neighbors: 11, Train Accuracy: 1.000000
Neighbors: 13, Train Accuracy: 1.000000
Neighbors: 15, Train Accuracy: 1.000000
Neighbors: 17, Train Accuracy: 1.000000
Neighbors: 19, Train Accuracy: 1.000000


In [11]:
# FOR TEST DATASET
for n in range(1, 20, 2):
    
    neighbor = WKNN(n_neighbors=n).fit(X_train, y_train)
    TestAcc = neighbor.score(X_test, y_test)
    print ("Neighbors: %d, Train Accuracy: %3f" % (n, TestAcc))

Neighbors: 1, Train Accuracy: 0.870833
Neighbors: 3, Train Accuracy: 0.862500
Neighbors: 5, Train Accuracy: 0.883333
Neighbors: 7, Train Accuracy: 0.850000
Neighbors: 9, Train Accuracy: 0.850000
Neighbors: 11, Train Accuracy: 0.850000
Neighbors: 13, Train Accuracy: 0.858333
Neighbors: 15, Train Accuracy: 0.837500
Neighbors: 17, Train Accuracy: 0.845833
Neighbors: 19, Train Accuracy: 0.845833
