# 4.	实验四：多层感知机实验
目标：自行构造一个多层感知机，完成对某种类型的样本数据的分类（如图像、文本等），也可以对人工自行构造的二维平面超过3类数据点（或者其它标准数据集）进行分类。<br>

要求：<br>
1.能给出与线性分类器（自行实现）做对比，并分析原因。<br>
2.用不同数据量，不同超参数，比较实验效果。<br>
3.不许用现成的平台，例如pytorch，tensorflow的自动微分工具。<br>
4.实现实验结果的可视化。<br>


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons, make_classification, load_iris, load_wine
from sklearn.model_selection import train_test_split

# 设置随机种
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
np.random.seed(42)

# ----------------------------------------
# 1.自建数据集
# ----------------------------------------
def generate_custom_data(n_samples=600):
    """生成三类二维数据"""
    n = n_samples // 3
    X1 = np.random.randn(n, 2) * 0.7 + np.array([1, 1])
    X2 = np.random.randn(n, 2) * 0.7 + np.array([-1, -1])
    X3 = np.random.randn(n, 2) * 0.7 + np.array([1, -1])
    X = np.vstack([X1, X2, X3])
    y = np.array([0]*n + [1]*n + [2]*n)
    return X, y

X_custom, y_custom = generate_custom_data(600)
Xc_train, Xc_test, yc_train, yc_test = train_test_split(X_custom, y_custom, test_size=0.3, random_state=42)

plt.figure(figsize=(6,5))
plt.title("Self", fontsize=13)
plt.scatter(Xc_train[:,0], Xc_train[:,1], c=yc_train, cmap='viridis', s=25, label='Train')
plt.scatter(Xc_test[:,0], Xc_test[:,1], c=yc_test, cmap='cool', s=20, alpha=0.6, label='Test')
plt.legend()
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.grid(alpha=0.3)
plt.show()

# ----------------------------------------
# 2.sklearn: make_moons
# ----------------------------------------
X_moon, y_moon = make_moons(n_samples=500, noise=0.2, random_state=42)
Xm_train, Xm_test, ym_train, ym_test = train_test_split(X_moon, y_moon, test_size=0.3, random_state=42)

plt.figure(figsize=(6,5))
plt.title("make_moons", fontsize=13)
plt.scatter(Xm_train[:,0], Xm_train[:,1], c=ym_train, cmap='coolwarm', s=25, label='Train')
plt.scatter(Xm_test[:,0], Xm_test[:,1], c=ym_test, cmap='Pastel1', s=20, alpha=0.6, label='Test')
plt.legend()
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.grid(alpha=0.3)
plt.show()


# ----------------------------------------
# 3.sklearn: load_iris
# ----------------------------------------
iris = load_iris()
X_iris, y_iris = iris.data, iris.target
Xi_train, Xi_test, yi_train, yi_test = train_test_split(X_iris, y_iris, test_size=0.3, random_state=42)
print(f"鸢尾花数据集: 训练集 {Xi_train.shape}, 测试集 {Xi_test.shape}, 类别数={len(np.unique(y_iris))}")

# 可视化前两维
plt.figure(figsize=(6,5))
plt.title("Iris First 2", fontsize=13)
plt.scatter(Xi_train[:,0], Xi_train[:,1], c=yi_train, cmap='viridis', s=25, label='Train')
plt.scatter(Xi_test[:,0], Xi_test[:,1], c=yi_test, cmap='cool', s=20, alpha=0.6, label='Test')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.legend()
plt.grid(alpha=0.3)
plt.show()

# ----------------------------------------
# 4.sklearn: load_wine
# ----------------------------------------
wine = load_wine()
X_wine, y_wine = wine.data, wine.target
Xw_train, Xw_test, yw_train, yw_test = train_test_split(X_wine, y_wine, test_size=0.3, random_state=42)
print(f"红酒数据集: 训练集 {Xw_train.shape}, 测试集 {Xw_test.shape}, 类别数={len(np.unique(y_wine))}")

# 可视化前两维
plt.figure(figsize=(6,5))
plt.title("Wine First 2", fontsize=13)
plt.scatter(Xw_train[:,0], Xw_train[:,1], c=yw_train, cmap='plasma', s=25, label='Train')
plt.scatter(Xw_test[:,0], Xw_test[:,1], c=yw_test, cmap='cool', s=20, alpha=0.6, label='Test')
plt.xlabel(wine.feature_names[0])
plt.ylabel(wine.feature_names[1])
plt.legend()
plt.grid(alpha=0.3)
plt.show()


In [None]:
class LinearClassifier:
    def __init__(self, input_dim, num_classes, lr=0.01, epochs=500):
        self.lr = lr
        self.epochs = epochs
        self.W = np.random.randn(input_dim, num_classes) * 0.01
        self.b = np.zeros((1, num_classes))
        self.loss_history = []

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def one_hot(self, y, num_classes):
        return np.eye(num_classes)[y]

    def fit(self, X, y):
        n_samples, n_features = X.shape
        num_classes = len(np.unique(y))
        Y_onehot = self.one_hot(y, num_classes)

        for epoch in range(self.epochs):
            # 前向传播
            logits = np.dot(X, self.W) + self.b
            probs = self.softmax(logits)

            # 计算损失（交叉熵）
            loss = -np.mean(np.sum(Y_onehot * np.log(probs + 1e-9), axis=1))
            self.loss_history.append(loss)

            # 反向传播
            dW = (1/n_samples) * np.dot(X.T, (probs - Y_onehot))
            db = (1/n_samples) * np.sum(probs - Y_onehot, axis=0, keepdims=True)

            # 参数更新
            self.W -= self.lr * dW
            self.b -= self.lr * db

            if (epoch+1) % 100 == 0:
                print(f"Lin Epoch {epoch+1}/{self.epochs}, Loss={loss:.4f}")

    def predict(self, X):
        logits = np.dot(X, self.W) + self.b
        probs = self.softmax(logits)
        return np.argmax(probs, axis=1)

    def accuracy(self, X, y):
        y_pred = self.predict(X)
        return np.mean(y_pred == y)


In [None]:
class MLP:
    def __init__(self, input_dim, hidden_dim, output_dim, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.loss_history = []

        # 参数初始化（Xavier 初始化）
        limit1 = np.sqrt(6 / (input_dim + hidden_dim))
        limit2 = np.sqrt(6 / (hidden_dim + output_dim))
        self.W1 = np.random.uniform(-limit1, limit1, (input_dim, hidden_dim))
        self.b1 = np.zeros((1, hidden_dim))
        self.W2 = np.random.uniform(-limit2, limit2, (hidden_dim, output_dim))
        self.b2 = np.zeros((1, output_dim))

    # 激活函数
    def relu(self, x): return np.maximum(0, x)
    def relu_deriv(self, x): return (x > 0).astype(float)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def one_hot(self, y, num_classes):
        return np.eye(num_classes)[y]

    def forward(self, X):
        z1 = np.dot(X, self.W1) + self.b1
        a1 = self.relu(z1)
        z2 = np.dot(a1, self.W2) + self.b2
        a2 = self.softmax(z2)
        cache = (X, z1, a1, z2, a2)
        return a2, cache

    def backward(self, cache, y_true):
        X, z1, a1, z2, a2 = cache
        n = X.shape[0]
        num_classes = a2.shape[1]
        Y_onehot = self.one_hot(y_true, num_classes)

        # 反向传播
        dz2 = (a2 - Y_onehot) / n
        dW2 = np.dot(a1.T, dz2)
        db2 = np.sum(dz2, axis=0, keepdims=True)

        da1 = np.dot(dz2, self.W2.T)
        dz1 = da1 * self.relu_deriv(z1)
        dW1 = np.dot(X.T, dz1)
        db1 = np.sum(dz1, axis=0, keepdims=True)

        # 参数更新
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

    def fit(self, X, y):
        num_classes = len(np.unique(y))
        for epoch in range(self.epochs):
            a2, cache = self.forward(X)
            loss = -np.mean(np.log(a2[np.arange(len(y)), y] + 1e-9))
            self.loss_history.append(loss)
            self.backward(cache, y)
            if (epoch + 1) % 100 == 0:
                print(f"MLP Epoch {epoch+1}/{self.epochs}, Loss={loss:.4f}")

    def predict(self, X):
        probs, _ = self.forward(X)
        return np.argmax(probs, axis=1)

    def accuracy(self, X, y):
        return np.mean(self.predict(X) == y)


In [None]:
def compare_models(X_train, X_test, y_train, y_test, title, hidden_dim=8, lr=0.05, epochs=500):
    num_classes = len(np.unique(y_train))

    linear = LinearClassifier(X_train.shape[1], num_classes, lr=lr, epochs=epochs)
    mlp = MLP(X_train.shape[1], hidden_dim, num_classes, lr=lr, epochs=epochs)

    linear.fit(X_train, y_train)
    print("\n")
    mlp.fit(X_train, y_train)

    lin_acc = linear.accuracy(X_test, y_test)
    mlp_acc = mlp.accuracy(X_test, y_test)
    print(f"[{title}] 线性分类器测试准确率: {lin_acc:.3f} | MLP测试准确率: {mlp_acc:.3f}")

    # 绘制损失曲线
    plt.figure(figsize=(6,4))
    plt.plot(linear.loss_history, label='LinearClassifier', linewidth=2)
    plt.plot(mlp.loss_history, label='MLP', linewidth=2)
    plt.title(f"{title} - loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(alpha=0.3)
    plt.show()

    visualize_boundary(linear, X_train, X_test, y_train, title, "linear")
    visualize_boundary(mlp, X_train, X_test, y_train, title, "mlp")


def visualize_boundary(model, X_train, X_test, y_train, datasetname, modelname):
    # 可视化
    X_vis_train = X_train[:, :2]
    X_vis_test = X_test[:, :2]

    x_min, x_max = X_vis_train[:,0].min() - 1, X_vis_train[:,0].max() + 1
    y_min, y_max = X_vis_train[:,1].min() - 1, X_vis_train[:,1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 300),
                         np.linspace(y_min, y_max, 300))
    grid = np.c_[xx.ravel(), yy.ravel()]

    grid_full = np.zeros((grid.shape[0], X_train.shape[1]))
    grid_full[:, :2] = grid  # 仅前两维填充

    preds = model.predict(grid_full).reshape(xx.shape)

    plt.figure(figsize=(6,5))
    plt.contourf(xx, yy, preds, alpha=0.3, cmap='viridis')
    plt.scatter(X_vis_train[:,0], X_vis_train[:,1], c=y_train, cmap='viridis', edgecolor='k', s=25, label='Train')
    plt.title(f"{datasetname} - {modelname} boundary")
    plt.legend()
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.grid(alpha=0.3)
    plt.show()


compare_models(Xc_train, Xc_test, yc_train, yc_test, title="Self", hidden_dim=10, lr=0.05, epochs=500)
compare_models(Xm_train, Xm_test, ym_train, ym_test, title="make_moons", hidden_dim=8, lr=0.05, epochs=500)
compare_models(Xi_train, Xi_test, yi_train, yi_test, title="Iris", hidden_dim=8, lr=0.05, epochs=500)
compare_models(Xw_train, Xw_test, yw_train, yw_test, title="Wine", hidden_dim=16, lr=0.05, epochs=500)

In [None]:
from sklearn.utils import shuffle

def train_mlp_with_variations(X, y, title, data_ratios=[0.2, 0.5, 1.0], hidden_dims=[4, 8, 16], lrs=[0.01, 0.05]):
    """依次测试不同数据量、隐藏层维度、学习率的影响"""
    num_classes = len(np.unique(y))
    X, y = shuffle(X, y, random_state=42)

    print(f"\n============================")
    print(f"数据集：{title}")
    print(f"============================\n")



    for ratio in data_ratios:
        n_samples = int(len(X) * ratio)
        X_sub, y_sub = X[:n_samples], y[:n_samples]
        X_train, X_test, y_train, y_test = train_test_split(X_sub, y_sub, test_size=0.3, random_state=42)
        print(f"数据比例：{ratio:.1f} (训练样本 {len(X_train)} 个)")

        for hidden_dim in hidden_dims:
            for lr in lrs:
                mlp = MLP(input_dim=X_train.shape[1], hidden_dim=hidden_dim, output_dim=num_classes,
                          lr=lr, epochs=10000)
                mlp.fit(X_train, y_train)
                train_acc = mlp.accuracy(X_train, y_train)
                test_acc = mlp.accuracy(X_test, y_test)

                # 绘制损失曲线
                plt.figure(figsize=(5, 3))
                plt.plot(mlp.loss_history, label=f'h={hidden_dim}, lr={lr}')
                plt.title(f"{title} | ratio={ratio:.1f}, h={hidden_dim}, lr={lr}\n"
                          f"TrainAcc={train_acc:.2f}, TestAcc={test_acc:.2f}")
                plt.xlabel("Epoch")
                plt.ylabel("Loss")
                plt.legend()
                plt.grid(alpha=0.3)
                plt.tight_layout()
                plt.show()

                visualize_boundary(mlp, X_train, X_test, y_train, title, "mlp")

                print(f"  → 隐层={hidden_dim:2d}, lr={lr:.3f}, "
                      f"TrainAcc={train_acc:.3f}, TestAcc={test_acc:.3f}")


# ---------------------------------------------
# 在 make_moons 数据集上优化
# ---------------------------------------------
ns = [500, 1000]
for n_sample in ns :
  X_moon, y_moon = make_moons(n_samples=n_sample, noise=0.2, random_state=42)
  Xm_train, Xm_test, ym_train, ym_test = train_test_split(X_moon, y_moon, test_size=0.3, random_state=42)
  print(f"\n n = {n_sample}")

  train_mlp_with_variations(Xm_train, ym_train, title="make_moons",
                          data_ratios=[1], hidden_dims=[8, 10], lrs=[0.01, 0.03])

