# 从零实现LVQ 

In [1]:
import numpy as np

In [None]:
class LVQ:
    def __init__(self, n_prototypes_per_class=1, learning_rate=0.01, max_iter=100):
        self.n_prototypes_per_class = n_prototypes_per_class  # 每个类别的原型向量数量
        self.learning_rate = learning_rate  # 学习率
        self.max_iter = max_iter  # 最大迭代次数
        self.prototypes = None  # 存储原型向量
        self.prototype_labels = None  # 存储原型向量对应的类别
    
    def fit(self, X, y):
        n_classes = len(np.unique(y))  # 类别数量
        n_features = X.shape[1]  # 特征数量
        
        # 初始化原型向量，每个类别选择 n_prototypes_per_class 个样本作为原型
        self.prototypes = np.zeros((n_classes * self.n_prototypes_per_class, n_features))
        self.prototype_labels = np.zeros(n_classes * self.n_prototypes_per_class)
        
        # 随机选择训练集中的样本作为原型向量
        for i, class_label in enumerate(np.unique(y)):
            class_samples = X[y == class_label]
            indices = np.random.choice(range(class_samples.shape[0]), self.n_prototypes_per_class, replace=False)
            self.prototypes[i*self.n_prototypes_per_class:(i+1)*self.n_prototypes_per_class] = class_samples[indices]
            self.prototype_labels[i*self.n_prototypes_per_class:(i+1)*self.n_prototypes_per_class] = class_label

        # 开始训练
        for epoch in range(self.max_iter):
            for i in range(X.shape[0]):
                # 计算样本与所有原型向量的距离
                distances = np.linalg.norm(self.prototypes - X[i], axis=1)
                closest_prototype_idx = np.argmin(distances)  # 找到距离最近的原型向量
                
                # 更新原型向量
                if self.prototype_labels[closest_prototype_idx] == y[i]:  # 类别相同，靠近样本
                    self.prototypes[closest_prototype_idx] += self.learning_rate * (X[i] - self.prototypes[closest_prototype_idx])
                else:  # 类别不同，远离样本
                    self.prototypes[closest_prototype_idx] -= self.learning_rate * (X[i] - self.prototypes[closest_prototype_idx])
    
    def predict(self, X):
        y_pred = np.zeros(X.shape[0])
        for i in range(X.shape[0]):
            # 计算样本与所有原型向量的距离
            distances = np.linalg.norm(self.prototypes - X[i], axis=1)
            closest_prototype_idx = np.argmin(distances)  # 找到距离最近的原型向量
            y_pred[i] = self.prototype_labels[closest_prototype_idx]  # 预测类别
        return y_pred