# KNN K-近邻算法

## 注意点

 - 没有显示的学习过程
 - 可用作分类或回归问题
 - 由k个邻居共同决策预测结果

## 模型影响因素

- 距离度量方法
- k值选择
- 预测决策规则

## 如何选取k值

- 通常使用交叉验证法确定合适的k值
- k值越大，模型容易欠拟合，方差减小，偏差增大
- k值越小，模型容易过拟合，容易受异常值影响，偏差减小，方差越大

## 实现方式

- 线性搜索 实现容易 效率不高
- kdtree 一般数据维度d不能过大 或 数据规模N满足N>>2^d

## 代码示例

In [1]:
from sklearn.datasets import load_iris

In [6]:
iris = load_iris()

### 数据维度

In [7]:
iris.data.shape

(150, 4)

### 数据类别

In [9]:
iris.target_names,iris.target

(array(['setosa', 'versicolor', 'virginica'], dtype='<U10'),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))

### 模型训练

In [2]:
from learn_ml.neighbour.knn import KNN
from sklearn.model_selection import train_test_split

In [17]:
x_train,x_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.9)

In [16]:
kdtree = KNN(x_train,y_train,k=3)
y_predict = []
for x in x_test:
    y_predict.append(kdtree.predict(x))

In [12]:
from sklearn.metrics import accuracy_score,f1_score

In [21]:
accuracy_score(y_predict,y_test)

1.0

In [24]:
f1_score(y_predict,y_test,average='micro')

1.0

In [3]:
from learn_ml.probabilistic.naive_bayes import NaiveBayes

In [18]:
clf = NaiveBayes()
clf.fit(x_train,y_train)

In [21]:
type(x_train[0][0])

numpy.float64

In [19]:
preds=clf.predict(x_test)

KeyError: 3.5

In [14]:
accuracy_score(preds,y_test)

1.0

In [15]:
preds,y_test

(array([2, 0, 1, 1, 1, 1, 0, 2, 0, 0, 1, 1, 2, 0, 1]),
 array([2, 0, 1, 1, 1, 1, 0, 2, 0, 0, 1, 1, 2, 0, 1]))

In [16]:
y_train

array([2, 2, 0, 1, 0, 1, 0, 0, 2, 1, 2, 0, 1, 1, 2, 2, 0, 2, 2, 1, 2, 2,
       2, 0, 2, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 2, 0, 1, 1,
       0, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 0, 2,
       0, 2, 0, 2, 1, 2, 1, 1, 2, 0, 0, 0, 0, 0, 0, 2, 2, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 2, 2, 1, 2, 2, 2, 1, 0, 1, 0, 1, 0, 0, 2, 2, 0, 2,
       0, 2, 0, 0, 1, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 2, 2, 2, 2, 0, 0, 1,
       2, 2, 2])