# 测试我们的算法

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

In [2]:
iris = datasets.load_iris()

In [3]:
X = iris.data
y = iris.target

In [4]:
X.shape

(150, 4)

In [5]:
y.shape

(150,)

### 1. 把数据集分为训练集和测试集

In [6]:
# 方法一：把矩阵 X 和向量 y 合并后打乱，然后再分开
data = np.hstack([X, y.reshape(-1, 1)])
np.random.shuffle(data)

X = data[:, :4]
y = data[:, 4]

test_ratio = 0.2
test_size = int(len(X) * test_ratio)
# 获取训练集
X_train = X[test_size:]
y_train = y[test_size:]
# 获取测试集
X_test = X[:test_size]
y_test = y[:test_size]

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(120, 4)
(120,)
(30, 4)
(30,)


In [7]:
# 方法二：利用随机索引
# shuffle_indexes = np.random.permutation(len(X)) # 生成随机索引
# test_ratio = 0.2
# test_size = int(len(X) * test_ratio)

# test_indexes = shuffle_indexes[:test_size]
# train_indexes = shuffle_indexes[test_size:]

# X_train = X[train_indexes]
# y_train = y[train_indexes]

# X_test = X[test_indexes]
# y_test = y[test_indexes]

# print(X_train.shape)
# print(y_train.shape)
# print(X_test.shape)
# print(y_test.shape)

### 2. 使用我们的算法测试

In [8]:
from kNN.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [10]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(120, 4)
(120,)
(30, 4)
(30,)


In [11]:
from kNN.kNN import KNNClassifier

In [12]:
my_knn_clf = KNNClassifier(k=3)

In [13]:
my_knn_clf.fit(X_train, y_train)

KNN(k=3)

In [14]:
y_predict = my_knn_clf.predict(X_test)

In [15]:
y_predict

array([1., 2., 2., 1., 2., 2., 0., 2., 0., 1., 1., 0., 0., 2., 1., 0., 0.,
       2., 0., 1., 0., 2., 1., 1., 1., 2., 2., 1., 0., 2.])

In [16]:
y_test

array([1., 2., 2., 1., 2., 2., 0., 2., 0., 1., 1., 0., 0., 2., 1., 0., 0.,
       2., 0., 1., 0., 2., 1., 1., 1., 2., 2., 1., 0., 2.])

In [17]:
sum(y_predict == y_test)

30

In [18]:
sum(y_predict == y_test)/len(y_test)

1.0

### 3. sklearn 中的 train_test_split

In [19]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state= 333)

In [21]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(120, 4)
(120,)
(30, 4)
(30,)
