In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pickle

In [9]:
# 激活函数及其导数
def relu(x):
    return np.maximum(0, x)


def relu_derivative(x):
    return np.where(x > 0, 1, 0)



# 均方误差损失函数及其导数
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)


def mean_squared_error_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size





# MLP类定义
class MLPRegressor:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        self.input_size = input_size
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.output_size = output_size

        # 初始化权重和偏置
        self.W1 = np.random.randn(input_size, hidden_size1) * 0.01
        self.b1 = np.zeros((1, hidden_size1))
        self.W2 = np.random.randn(hidden_size1, hidden_size2) * 0.01
        self.b2 = np.zeros((1, hidden_size2))
        self.W3 = np.random.randn(hidden_size2, output_size) * 0.01
        self.b3 = np.zeros((1, output_size))

    def forward(self, X):
        # 前向传播
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = relu(self.z2)
        self.z3 = np.dot(self.a2, self.W3) + self.b3

        return self.a3

    def backward(self, X, y, learning_rate):
        # 反向传播
        m = X.shape[0]  # 使用输入样本数
        # print(f"m:{m}")

        # 计算输出层的梯度
        d_loss_a3 = mean_squared_error_derivative(y, self.a3)

        # 计算第三层的梯度
        d_loss_W3 = np.dot(self.a2.T, d_loss_a3) / m
        d_loss_b3 = np.sum(d_loss_a3, axis=0, keepdims=True) / m

        # 计算第二层的梯度
        d_loss_a2 = np.dot(d_loss_a3, self.W3.T)
        d_loss_z2 = d_loss_a2 * relu_derivative(self.z2)
        d_loss_W2 = np.dot(self.a1.T, d_loss_z2) / m
        d_loss_b2 = np.sum(d_loss_z2, axis=0, keepdims=True) / m

        # 计算第一层的梯度
        d_loss_a1 = np.dot(d_loss_z2, self.W2.T)
        d_loss_z1 = d_loss_a1 * relu_derivative(self.z1)
        d_loss_W1 = np.dot(X.T, d_loss_z1) / m
        d_loss_b1 = np.sum(d_loss_z1, axis=0, keepdims=True) / m

        # 更新权重和偏置
        self.W3 -= learning_rate * d_loss_W3
        self.b3 -= learning_rate * d_loss_b3
        self.W2 -= learning_rate * d_loss_W2
        self.b2 -= learning_rate * d_loss_b2
        self.W1 -= learning_rate * d_loss_W1
        self.b1 -= learning_rate * d_loss_b1

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            output = self.forward(X)
            loss =mean_squared_error(y, output)
            self.backward(X, y, learning_rate)
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

    def accuracy(self, X, y):
        y_predicted=self.forward(X)
        predictions = np.argmax(y_predicted, axis=1)
        labels = np.argmax(y, axis=1)
        return np.mean(predictions == labels)
    
    
    
    def save_weights(self, file_path):
        weights = {
            'W1': self.W1,
            'b1': self.b1,
            'W2': self.W2,
            'b2': self.b2,
            'W3': self.W3,
            'b3': self.b3
        }
        with open(file_path, 'wb') as file:
            pickle.dump(weights, file)

    def load_weights(self, file_path):
        with open(file_path, 'rb') as file:
            weights = pickle.load(file)
            self.W1 = weights['W1']
            self.b1 = weights['b1']
            self.W2 = weights['W2']
            self.b2 = weights['b2']
            self.W3 = weights['W3']
            self.b3 = weights['b3']

In [3]:
def load_mnist_data(train_path):
    # 加载CSV文件
    train_data = pd.read_csv(train_path).values

    # 打印数据集基本信息
    print(f"Number of rows: {train_data.shape[0]}, Number of columns: {train_data.shape[1]}")

    # 提取特征和标签
    X_train = train_data[:, 1:]
    y_train = train_data[:, 0]

    # 打印特征和标签的维度
    print(f"Shape of X_train: {X_train.shape}")
    print(f"Shape of y_train: {y_train.shape}")

    # 打印部分数据以检查
    print("First 5 rows of X_train:")
    print(X_train[:5])
    print("First 5 labels of y_train:")
    print(y_train[:5])

    # 归一化像素值到 [0, 1]
    X_train = X_train / 255.0

    # 将标签转换为独热编码
    y_train = np.eye(10)[y_train.astype(int)]

    # 打印转换后的独热编码标签
    print("First 5 rows of one-hot encoded y_train:")
    print(y_train[:5])

    return X_train, y_train

In [4]:
X_train,y_train=load_mnist_data('./data/mnist_train.csv')

In [10]:
# 示例用法
if __name__ == "__main__":
    
    # 创建MLP模型
    mlp = MLP(input_size=784, hidden_size1=128, hidden_size2=64, output_size=10)
    #
    # # 训练MLP模型
    mlp.train(X_train, y_train, epochs=1000, learning_rate=0.2)
    #
    # 保存训练后的权重
    mlp.save_weights('./data/mlp_weights.pkl')
    # 评估模型
    train_accuracy = mlp.accuracy(X_train, y_train)

    print(f'Train Accuracy: {train_accuracy * 100:.2f}%')



原始数据形状: (20640, 10)
原始数据头部:
    longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -122.23     37.88                41.0        880.0           129.0   
1    -122.22     37.86                21.0       7099.0          1106.0   
2    -122.24     37.85                52.0       1467.0           190.0   
3    -122.25     37.85                52.0       1274.0           235.0   
4    -122.25     37.85                52.0       1627.0           280.0   

   population  households  median_income  median_house_value ocean_proximity  
0       322.0       126.0         8.3252            452600.0        NEAR BAY  
1      2401.0      1138.0         8.3014            358500.0        NEAR BAY  
2       496.0       177.0         7.2574            352100.0        NEAR BAY  
3       558.0       219.0         5.6431            341300.0        NEAR BAY  
4       565.0       259.0         3.8462            342200.0        NEAR BAY   

<class 'pandas.core.frame.DataFrame'>
RangeI

ValueError: shapes (16346,16346) and (1,32) not aligned: 16346 (dim 1) != 1 (dim 0)