# 用KNN算法完成分类任务

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# 读取数据集
m_x = np.loadtxt('data/mnist_x')
m_y = np.loadtxt('data/mnist_y')

# 数据可视化
data = np.reshape(np.array(m_x[0],dtype=int),[28,28])
plt.figure()
plt.imshow(data,cmap='Blues')

In [None]:
# 将数据集分为训练集和测试集
ratio = 0.8
split = int(len(m_x) * ratio)

# 打乱数据
np.random.seed(2023)
idx = np.random.permutation(np.arange(len(m_x)))
m_x = m_x[idx]
m_y = m_y[idx]
x_train,x_test = m_x[:split],m_x[split:]
y_train,y_test = m_y[:split],m_y[split:]

# 定义样本之间的距离
# 欧式距离
def distance(a,b):
    return np.sqrt(np.sum(np.square(a - b)))

In [None]:
# KNN算法类
class KNN:
    def __init__(self,k,label_num):
        # 设置k，标签数量
        self.k = k
        self.label_num = label_num

    def fit(self,x_train,y_train):
        # 在类中保存训练数据
        self.x_train = x_train
        self.y_train = y_train

    def get_knn_indices(self,x):
        # 获取距离目标最近的K个样本点的下标
        # 计算已知样本到目标样本的距离
        dis = list(map(lambda a:distance(a,x),self.x_train))
        # 按距离从小到大排序，获得对应下标
        knn_indices = np.argsort(dis)
        # 取最近的k个下标
        knn_indices = knn_indices[:self.k]
        return knn_indices

    def get_label(self,x):
        # KNN方法的具体实现，观察k个近邻并获取数量最多的类别
        knn_indices = self.get_knn_indices(x)
        # 类别计数
        label_statistic = np.zeros(shape=[self.label_num])
        for index in knn_indices:
            label = int(self.y_train[index])
            label_statistic[label] += 1
        # 返回数量最多的类型
        return np.argmax(label_statistic)

    def predict(self,x_test):
        # 预测样本x_test的类别
        predicted_test_labels = np.zeros(shape=[len(x_test)],dtype=int)
        for i,x in enumerate(x_test):
            predicted_test_labels[i] = self.get_label(x)
        return predicted_test_labels


In [None]:
# 在测试集上观察算法的效果
for k in range(1,10):
    knn = KNN(k,label_num=10)
    knn.fit(x_train,y_train)
    predicted_labels = knn.predict(x_test)
    accuracy = np.mean(predicted_labels == y_test)
    print(f'K的取值为{k},预测准确率为{accuracy * 100:.1f}%')

# 使用scikit-learn实现KNN算法

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from matplotlib.colors import ListedColormap

# 读入高斯数据集
data = np.loadtxt('data/gauss.csv',delimiter=',')
x_train = data[:,:2]
y_train = data[:,2]
print('数据集大小：',len(x_train))

In [None]:
x_train[y_train == 0, 0]

In [None]:
# 数据可视化
plt.figure()
plt.scatter(x_train[y_train == 0, 0], x_train[y_train == 0, 1], c='blue', marker='o')
plt.scatter(x_train[y_train == 1, 0], x_train[y_train == 1, 1], c='red', marker='x')
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()

In [None]:
# 设置步长
step = 0.1
# 设置网格边界
x_min, x_max = np.min(x_train[:, 0]) - 1, np.max(x_train[:, 0]) + 1 
y_min, y_max = np.min(x_train[:, 1]) - 1, np.max(x_train[:, 1]) + 1
# 构造网格
xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
grid_data = np.concatenate([xx.reshape(-1, 1), yy.reshape(-1, 1)], axis=1)

In [None]:
xx,yy

In [None]:
grid_data

In [None]:
fig = plt.figure(figsize=(16,4.5))
# K值
ks = [1, 3, 10] 
cmap_light = ListedColormap(['royalblue', 'lightcoral'])

for i, k in enumerate(ks):
    # 定义KNN分类器
    knn = KNeighborsClassifier(n_neighbors=k) 
    knn.fit(x_train, y_train)
    z = knn.predict(grid_data)

    # 画出分类结果
    ax = fig.add_subplot(1, 3, i + 1)
    ax.pcolormesh(xx, yy, z.reshape(xx.shape), cmap=cmap_light, alpha=0.7)
    ax.scatter(x_train[y_train == 0, 0], x_train[y_train == 0, 1], c='blue', marker='o')
    ax.scatter(x_train[y_train == 1, 0], x_train[y_train == 1, 1], c='red', marker='x')

    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_title(f'K = {k}')
plt.show()

# 用KNN算法完成色彩风格迁移

In [None]:
from skimage import io
from skimage.color import rgb2lab,lab2rgb
from sklearn.neighbors import KNeighborsRegressor
import os

In [None]:
# 风格图像展示
path = 'data/style_transfer/'
# 图像展示
data_dir = os.path.join(path,'vangogh')
fig = plt.figure(figsize=(16,5))
for i,file in enumerate(np.sort(os.listdir(data_dir))[:3]):
    img = io.imread(os.path.join(data_dir,file))
    ax = fig.add_subplot(1,3,i+1)
    ax.imshow(img)
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_title(file)
plt.show()

In [None]:
# 记录风格图像中每个窗口对应的原始颜色，供最后着色使用
# block_size表示向外扩散的层数
block_size = 1

def read_style_image(file_name,size=block_size):
    # 读取风格图像
    # X存储3*3像素格的灰度值，Y存储中心像素的色彩值
    # 读取图像文件，得到W*H*3的RGB矩阵
    img = io.imread(file_name)
    fig = plt.figure()
    plt.imshow(img)
    plt.xlabel('X axis')
    plt.ylabel('Y axis')
    plt.show()

    # 将RGB模式转换为LAB模式，三维分别是L，A，B
    img = rgb2lab(img)
    # 取出图像的宽度和高度
    w,h = img.shape[:2]

    X = []
    Y = []
    # 枚举全部可能的中心点
    for x in range(size,w - size):
        for y in range(size,h - size):
            # 保存所有窗口
            X.append(img[x - size:x + size + 1,
                         y - size:y + size+1,0].flatten())
            # 保存窗口对应的色彩值a,b
            Y.append(img[x,y,1:])
    return X,Y


In [None]:
X,Y = read_style_image(os.path.join(path,'style.jpg'))
# weights='distance'表示邻居的权重与其到样本的距离成反比
knn = KNeighborsRegressor(n_neighbors=4,weights='distance')
knn.fit(X,Y)

In [None]:
# 将内容图像分割成同样大小的窗口，使用KNN着色
def rebuild(img,size=block_size):
    # 打印内容图像
    fig = plt.figure()
    plt.imshow(img)
    plt.xlabel('X axis')
    plt.ylabel('Y axis')
    plt.show()

    # 将内容图像用LAB模式表示
    img = rgb2lab(img)
    w,h = img.shape[:2]

    # 初始化输出图像对应的矩阵
    photo = np.zeros([w,h,3])
    # 枚举内容图像的中心点保存所有窗口
    print('Constructing window...')
    X = []
    for x in range(size,w - size):
        for y in range(size,h - size):
            # 保存所有窗口
            window = img[x - size:x + size + 1,
                         y - size:y + size+1,0].flatten()
            # 保存窗口对应的色彩值a,b
            X.append(window)
    X = np.array(X)
    
    # 用KNN回归器预测颜色
    print('Predicting...')
    pred_ab = knn.predict(X).reshape(w - 2 * size,h - 2 * size,-1)
    # 设置输出图像
    photo[:,:,0] = img[:,:,0]
    photo[size:w - size,size:h - size,1:] = pred_ab

    # 由于最外层size层无法构造窗口，我们直接把这些像素裁剪掉
    photo = photo[size:w - size,size:h - size,:]
    return photo

In [None]:
# 设置相关参数，展示风格迁移后的图像
content = io.imread(os.path.join(path,'input.jpg'))
new_photo = rebuild(content)

# 将生成的图像转换成RGB模式
new_photo = lab2rgb(new_photo)

fig = plt.figure()
plt.imshow(new_photo)
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()

# Others
- map()函数的用法https://blog.csdn.net/qq_41554005/article/details/119931689
- enumerate()函数的用法https://blog.csdn.net/churximi/article/details/51648388
- np.readtxt()函数的用法https://blog.csdn.net/cyh153296/article/details/83502076
- plt.scatter()函数的用法https://blog.csdn.net/gongdiwudu/article/details/129947219
- np.meshgrid()函数的用法https://blog.csdn.net/xbinworld/article/details/107451926
- np.concatenate()函数的用法https://blog.csdn.net/kekeshu_k/article/details/109110916

In [None]:
# enumerate()函数的用法https://blog.csdn.net/churximi/article/details/51648388
# enumerate将其组成一个索引序列，利用它可以同时获得索引和值
list1 = ["这", "是", "一个", "测试"]
for index, item in enumerate(list1):
    print(index, item)