# KNN 알고리즘 구현

### 데이터 불러오기: Iris

In [533]:
from sklearn.datasets import load_iris

iris_=load_iris()
iris={'data':iris_['data'],'target':iris_['target']}

### 훈련용 데이터와 테스트용 데이터로 분리

In [534]:
from sklearn.model_selection import train_test_split

train_data,test_data,train_target,test_target=train_test_split(iris['data'],iris['target'],stratify=iris['target'])

### 학습: 데이터 저장

In [535]:
train={'data':train_data,'target':train_target}

In [536]:
#함수로 저장
def fit(train_data,train_target):
    train={'data':train_data,'target':train_target}

### 예측

- 각 데이터까지의 거리 구하기

In [537]:
import numpy as np

def dist(x,y):
    return np.sqrt(np.sum([(m2-m1)**2 for (m1,m2) in zip(x,y)]))

In [538]:
test_data[2]

array([7.2, 3. , 5.8, 1.6])

In [539]:
dlist={'dist':[],'target':[]}

for d,t in zip(train['data'],train['target']):
    dlist['dist'].append(dist(test_data[2],d))
    dlist['target'].append(t)

### 가장 가까운 점 3개의 타겟 추출

In [540]:
np.argsort(dlist['dist'][:3])

array([0, 1, 2], dtype=int64)

In [541]:
n3_neighbors=[dlist['target'][a] for a in np.array(dlist['dist']).argsort()[:3]]

In [542]:
n3_neighbors

[2, 2, 2]

### 가장 개수가 많은 타겟 찾기

In [543]:
np.unique(n3_neighbors,return_counts=True)[0][np.argmax(np.unique(n3_neighbors,return_counts=True)[1])]

2

### 실제 값과 비교

In [544]:
test_target[2]

2

In [545]:
#함수로 저장
def predict(data):
    return_list=[]

    for dt in data:
        d_list={'dist':[],'target':[]}
        for d,t in zip(train['data'],train['target']):
            d_list['dist'].append(dist(dt,d))
            d_list['target'].append(t)
    
        n_neighbors=[d_list['target'][a] for a in np.array(d_list['dist']).argsort()[:3]]
        return_list.append(np.unique(n_neighbors,return_counts=True)[0][np.argmax(np.unique(n_neighbors,return_counts=True)[1])])
    
    return return_list

In [546]:
np.array(predict(test_data))

array([2, 1, 2, 1, 1, 1, 2, 1, 0, 2, 0, 0, 0, 2, 1, 2, 1, 0, 1, 2, 0, 1,
       1, 0, 0, 2, 1, 2, 2, 2, 1, 0, 1, 1, 0, 0, 0, 2])

In [547]:
test_target

array([2, 2, 2, 1, 1, 1, 2, 1, 0, 2, 0, 0, 0, 2, 1, 2, 1, 0, 1, 2, 0, 1,
       1, 0, 0, 2, 1, 2, 2, 2, 1, 0, 1, 1, 0, 0, 0, 2])

___
### 클래스로 정리

In [548]:
class MyKNNClassifier:
    n_neighbors=3
    train={}

    def __init__(self,n_neighbors=3):
        self.n_neighbors=n_neighbors

    def fit(self,train_data,train_target):
        self.train['data']=train_data
        self.train['target']=train_target
    
    def _dist(self,x,y):
        return np.sqrt(np.sum([(m2-m1)**2 for (m1,m2) in zip(x,y)]))

    def predict(self,data):
        return_list=[]

        for dt in data:
            d_list={'dist':[],'target':[]}
            for d,t in zip(self.train['data'],self.train['target']):
                d_list['dist'].append(self._dist(dt,d))
                d_list['target'].append(t)
    
            n_neighbors=[d_list['target'][a] for a in np.array(d_list['dist']).argsort()[:3]]
            return_list.append(np.unique(n_neighbors,return_counts=True)[0][np.argmax(np.unique(n_neighbors,return_counts=True)[1])])
    
        return return_list

    def score(self,data,target):
        predicted=self.predict(data)
        score_list=[a==b for a,b in zip(predicted,target)]
        return np.mean(score_list)


In [549]:
mknn=MyKNNClassifier(n_neighbors=3)
mknn.fit(train_data,train_target)
mknn.score(test_data,test_target)

0.9736842105263158

### sklearn과 비교

In [550]:
from sklearn.neighbors import KNeighborsClassifier

knn=KNeighborsClassifier(n_neighbors=3)
knn.fit(train_data,train_target)
knn.score(test_data,test_target)

0.9736842105263158