In [105]:
import numpy as np

class My_kNN():
    def __init__(self, k, X_train, y_train, X_test, y_test=None):
        self.k = k
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.neighbors = np.zeros((len(self.X_test), len(self.X_train))) #len(self.X_test) X len(self.X_train)
        
    #定义欧式距离
    def EuclDis(self, x0, x1):
        return np.sum(np.square(x1-x0)) #矩阵各元素平方
    
    #计算当前数据与标签数据的距离
    def Allneighbors(self):
        for i in range(len(self.X_test)):
            for j in range(len(self.X_train)):
                self.neighbors[i][j] = self.EuclDis(self.X_test[i], self.X_train[j])
    
    #分类问题:下标转为类别
    def index2label(self, index):
        knearest = self.y_train[index][:self.X_test.shape[0]] #获取下标对应的标签
        #统计k近邻的大多数:
        predict = []
        for i in range(self.X_test.shape[0]):
            predict.append(np.argmax(np.bincount(knearest[i])))
        return np.array(predict)
    
    #回归问题：下标转为数值
    def index2value(self, index):
        knearest = self.y_train[index][:self.X_test.shape[0]]
        #统计k近邻的大多数：
        predict= np.mean(knearest, axis=1) #预测结果为k近邻的均值
        return predict.reshape(-1)
    
    def kNN(self, mode="classification"):
        #1.计算距离
        self.Allneighbors()
        #2.按距离从小到大排,返回数组排序后的元素索引值
        self.sort_index = np.argsort(self.neighbors, axis=1, kind='quicksort', order= None)
        #3.取前k个近邻
        self.sort_index = self.sort_index[:,0:self.k]
        if mode == "classification":
            return self.index2label(self.sort_index)
        if mode == "regression":
            return self.index2value(self.sort_index)
    
    def score(self):
        y_pred = self.kNN()
        y_test = np.array(self.y_test)
        return np.mean(y_pred==y_test)

In [106]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split #切分数据集为训练集和测试集
from sklearn.metrics import accuracy_score #计算分类预测的准确率
import matplotlib.pyplot as plt
import pandas as pd

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns=['sepal length','sepal width','petal length','petal width','label']
df.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width,label
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [107]:
X, Y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
print(X_train)
print(y_train)
print(X_test)

[[6.9 3.1 4.9 1.5]
 [7.7 3.  6.1 2.3]
 [5.6 2.9 3.6 1.3]
 [7.9 3.8 6.4 2. ]
 [4.7 3.2 1.3 0.2]
 [5.  2.  3.5 1. ]
 [6.  3.4 4.5 1.6]
 [5.5 4.2 1.4 0.2]
 [6.  2.2 4.  1. ]
 [5.6 3.  4.5 1.5]
 [7.1 3.  5.9 2.1]
 [7.7 3.8 6.7 2.2]
 [4.9 2.5 4.5 1.7]
 [6.4 3.2 4.5 1.5]
 [6.7 3.3 5.7 2.1]
 [5.5 2.4 3.8 1.1]
 [5.2 2.7 3.9 1.4]
 [5.5 3.5 1.3 0.2]
 [5.8 2.8 5.1 2.4]
 [5.6 3.  4.1 1.3]
 [6.3 3.3 4.7 1.6]
 [5.7 3.  4.2 1.2]
 [5.7 2.8 4.1 1.3]
 [4.9 3.1 1.5 0.1]
 [6.4 3.2 5.3 2.3]
 [4.4 3.2 1.3 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.  1.4 0.3]
 [5.8 2.7 3.9 1.2]
 [6.1 3.  4.6 1.4]
 [6.3 2.9 5.6 1.8]
 [5.1 3.3 1.7 0.5]
 [4.9 3.  1.4 0.2]
 [5.1 3.8 1.9 0.4]
 [6.3 2.7 4.9 1.8]
 [6.8 2.8 4.8 1.4]
 [5.4 3.9 1.7 0.4]
 [6.1 3.  4.9 1.8]
 [4.4 3.  1.3 0.2]
 [5.7 2.5 5.  2. ]
 [6.  2.9 4.5 1.5]
 [5.  3.4 1.5 0.2]
 [4.8 3.1 1.6 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.  1.6 0.2]
 [4.9 2.4 3.3 1. ]
 [6.9 3.1 5.1 2.3]
 [5.7 2.6 3.5 1. ]
 [6.8 3.  5.5 2.1]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [5.9 3.  5.1 1.8]
 [6.3 2.3 4.

In [113]:
my_knn = My_kNN(3, X_train, y_train, X_test, y_test)
score = my_knn.score()
print("准确率为：", score)
test1_feature = [[6.1,3.1,4.7,2.1]]
test1_feature = np.array(test1_feature)
my_knn = My_kNN(3, X_train, y_train, test1_feature)
test1_label = my_knn.kNN()
print("Label of test1:", test1_label)

准确率为： 0.9777777777777777
Label of test1: [2]


In [114]:
#用sklearn
from sklearn.neighbors import KNeighborsClassifier

print(X_train)
print(X_test)

knn = KNeighborsClassifier(n_neighbors=3)
knn = knn.fit(X_train, y_train)
print("kNN:", knn)
y_pred = knn.predict(X_test)
y_true = y_test
print(knn.score(X_test, y_test))

test1 = knn.predict([[6.1,3.1,4.7,2.1]])
print(test1)

[[6.9 3.1 4.9 1.5]
 [7.7 3.  6.1 2.3]
 [5.6 2.9 3.6 1.3]
 [7.9 3.8 6.4 2. ]
 [4.7 3.2 1.3 0.2]
 [5.  2.  3.5 1. ]
 [6.  3.4 4.5 1.6]
 [5.5 4.2 1.4 0.2]
 [6.  2.2 4.  1. ]
 [5.6 3.  4.5 1.5]
 [7.1 3.  5.9 2.1]
 [7.7 3.8 6.7 2.2]
 [4.9 2.5 4.5 1.7]
 [6.4 3.2 4.5 1.5]
 [6.7 3.3 5.7 2.1]
 [5.5 2.4 3.8 1.1]
 [5.2 2.7 3.9 1.4]
 [5.5 3.5 1.3 0.2]
 [5.8 2.8 5.1 2.4]
 [5.6 3.  4.1 1.3]
 [6.3 3.3 4.7 1.6]
 [5.7 3.  4.2 1.2]
 [5.7 2.8 4.1 1.3]
 [4.9 3.1 1.5 0.1]
 [6.4 3.2 5.3 2.3]
 [4.4 3.2 1.3 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.  1.4 0.3]
 [5.8 2.7 3.9 1.2]
 [6.1 3.  4.6 1.4]
 [6.3 2.9 5.6 1.8]
 [5.1 3.3 1.7 0.5]
 [4.9 3.  1.4 0.2]
 [5.1 3.8 1.9 0.4]
 [6.3 2.7 4.9 1.8]
 [6.8 2.8 4.8 1.4]
 [5.4 3.9 1.7 0.4]
 [6.1 3.  4.9 1.8]
 [4.4 3.  1.3 0.2]
 [5.7 2.5 5.  2. ]
 [6.  2.9 4.5 1.5]
 [5.  3.4 1.5 0.2]
 [4.8 3.1 1.6 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.  1.6 0.2]
 [4.9 2.4 3.3 1. ]
 [6.9 3.1 5.1 2.3]
 [5.7 2.6 3.5 1. ]
 [6.8 3.  5.5 2.1]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [5.9 3.  5.1 1.8]
 [6.3 2.3 4.