In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pickle

In [2]:
# 激活函数及其导数
def relu(x):
    return np.maximum(0, x)


def relu_derivative(x):
    return np.where(x > 0, 1, 0)


# 多分类
def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)


# 均方误差损失函数及其导数
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)


def mean_squared_error_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size


# 损失函数及其导数
def cross_entropy_loss(y_true, y_pred):
    loss = -np.mean(np.sum(y_true * np.log(y_pred + 1e-15), axis=1))
    return loss


def cross_entropy_loss_derivative(y_true, y_pred):
    return y_pred - y_true


# MLP类定义
class MLP:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        self.input_size = input_size
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.output_size = output_size

        # 初始化权重和偏置
        self.W1 = np.random.randn(input_size, hidden_size1) * 0.01
        self.b1 = np.zeros((1, hidden_size1))
        self.W2 = np.random.randn(hidden_size1, hidden_size2) * 0.01
        self.b2 = np.zeros((1, hidden_size2))
        self.W3 = np.random.randn(hidden_size2, output_size) * 0.01
        self.b3 = np.zeros((1, output_size))

    def forward(self, X):
        # 前向传播
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = relu(self.z2)
        self.z3 = np.dot(self.a2, self.W3) + self.b3
        self.a3 = softmax(self.z3)  # 输出层使用Softmax激活函数
        return self.a3

    def backward(self, X, y, learning_rate):
        # 反向传播
        m = X.shape[0]  # 使用输入样本数
        # print(f"m:{m}")

        # 计算输出层的梯度
        d_loss_a3 = cross_entropy_loss_derivative(y, self.a3)

        # 计算第三层的梯度
        d_loss_W3 = np.dot(self.a2.T, d_loss_a3) / m
        d_loss_b3 = np.sum(d_loss_a3, axis=0, keepdims=True) / m

        # 计算第二层的梯度
        d_loss_a2 = np.dot(d_loss_a3, self.W3.T)
        d_loss_z2 = d_loss_a2 * relu_derivative(self.z2)
        d_loss_W2 = np.dot(self.a1.T, d_loss_z2) / m
        d_loss_b2 = np.sum(d_loss_z2, axis=0, keepdims=True) / m

        # 计算第一层的梯度
        d_loss_a1 = np.dot(d_loss_z2, self.W2.T)
        d_loss_z1 = d_loss_a1 * relu_derivative(self.z1)
        d_loss_W1 = np.dot(X.T, d_loss_z1) / m
        d_loss_b1 = np.sum(d_loss_z1, axis=0, keepdims=True) / m

        # 更新权重和偏置
        self.W3 -= learning_rate * d_loss_W3
        self.b3 -= learning_rate * d_loss_b3
        self.W2 -= learning_rate * d_loss_W2
        self.b2 -= learning_rate * d_loss_b2
        self.W1 -= learning_rate * d_loss_W1
        self.b1 -= learning_rate * d_loss_b1

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            output = self.forward(X)
            loss = cross_entropy_loss(y, output)
            self.backward(X, y, learning_rate)
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

    def accuracy(self, X, y):
        y_predicted=self.forward(X)
        predictions = np.argmax(y_predicted, axis=1)
        labels = np.argmax(y, axis=1)
        return np.mean(predictions == labels)
    
    
    
    def save_weights(self, file_path):
        weights = {
            'W1': self.W1,
            'b1': self.b1,
            'W2': self.W2,
            'b2': self.b2,
            'W3': self.W3,
            'b3': self.b3
        }
        with open(file_path, 'wb') as file:
            pickle.dump(weights, file)

    def load_weights(self, file_path):
        with open(file_path, 'rb') as file:
            weights = pickle.load(file)
            self.W1 = weights['W1']
            self.b1 = weights['b1']
            self.W2 = weights['W2']
            self.b2 = weights['b2']
            self.W3 = weights['W3']
            self.b3 = weights['b3']

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


def load_california_data(datafile='./data/housing.csv'):
    def load_and_inspect_data(datafile):
        data = pd.read_csv(datafile, sep=',')
        print("原始数据形状:", data.shape)
        print("原始数据头部:\n", data.head(), "\n")
        return data

    def clean_data(data):
        data = data.drop(["longitude", "ocean_proximity"], axis=1)
        print("数据信息:\n", data.info(), "\n")
        print("数据描述统计量:\n", data.describe(), "\n")
        null_counts = data.isnull().sum()
        print("缺失值统计:\n", null_counts, "\n")
        data = data.dropna()
        print(f"删除缺失值后的数据形状: {data.shape}\n")
        return data

    def preprocess_features_and_labels(data):
        X = data.drop(["median_house_value"], axis=1).values
        y = data["median_house_value"].values
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        return X, y

    def split_data(X, y):
        train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)
        print("训练集特征形状:", train_X.shape)
        print("测试集特征形状:", test_X.shape)
        print("训练集标签形状:", train_y.shape)
        print("测试集标签形状:", test_y.shape)
        return train_X, test_X, train_y, test_y

    data = load_and_inspect_data(datafile)
    data = clean_data(data)
    X, y = preprocess_features_and_labels(data)
    train_X, test_X, train_y, test_y = split_data(X, y)

    return train_X, test_X, train_y, test_y

In [4]:
train_X, test_X, train_y, test_y = load_california_data()

原始数据形状: (20640, 10)
原始数据头部:
    longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -122.23     37.88                41.0        880.0           129.0   
1    -122.22     37.86                21.0       7099.0          1106.0   
2    -122.24     37.85                52.0       1467.0           190.0   
3    -122.25     37.85                52.0       1274.0           235.0   
4    -122.25     37.85                52.0       1627.0           280.0   

   population  households  median_income  median_house_value ocean_proximity  
0       322.0       126.0         8.3252            452600.0        NEAR BAY  
1      2401.0      1138.0         8.3014            358500.0        NEAR BAY  
2       496.0       177.0         7.2574            352100.0        NEAR BAY  
3       558.0       219.0         5.6431            341300.0        NEAR BAY  
4       565.0       259.0         3.8462            342200.0        NEAR BAY   

<class 'pandas.core.frame.DataFrame'>
RangeI

In [36]:
# 示例用法
if __name__ == "__main__":
    
    # 创建MLP模型
    mlp = MLP(input_size=7, hidden_size1=64, hidden_size2=32, output_size=1)
    #
    # # 训练MLP模型
    mlp.train(train_X, train_y, epochs=1000, learning_rate=0.2)
    #
    # 保存训练后的权重
    mlp.save_weights('./data/mlp_weights.pkl')
    # 评估模型
    train_accuracy = mlp.accuracy(train_X,train_y)

    print(f'Train Accuracy: {train_accuracy * 100:.2f}%')



Epoch 0, Loss: 2.3025509537665756
Epoch 100, Loss: 2.2966796033832386
Epoch 200, Loss: 1.1496113881550216
Epoch 300, Loss: 0.6376834921761966
Epoch 400, Loss: 0.4369163678380998
Epoch 500, Loss: 0.3542330706425764
Epoch 600, Loss: 0.29741057084043016
Epoch 700, Loss: 0.2547212442832033
Epoch 800, Loss: 0.22011188482054053
Epoch 900, Loss: 0.19218529935925044
Train Accuracy: 95.15%


In [1]:

import numpy as np

import pandas as pd
from sklearn.preprocessing import StandardScaler


def load_california_data(datafile):
    """
    加载并预处理加州房价数据集。

    参数:
    datafile (str): 包含数据集的CSV文件的路径。

    返回:
    tuple: 标准化后的特征矩阵X和目标向量y。
    """
    try:
        # 读取CSV文件
        data = pd.read_csv(datafile, sep=',')

        # 删除无关列
        data = data.drop(["longitude", "ocean_proximity"], axis=1)

        # 填充缺失值
        data["total_bedrooms"].fillna(data["total_bedrooms"].median(), inplace=True)

        # 分离特征和目标
        X = data.drop(["median_house_value"], axis=1).values
        y = data["median_house_value"].values

        # 标准化特征
        scaler = StandardScaler()
        X = scaler.fit_transform(X)

        return X, y

    except Exception as e:
        print(f"处理文件 {datafile} 时发生错误: {e}")

In [3]:
datafile = "./data/housing.csv"
X, y = load_california_data(datafile)

In [7]:
X

array([[ 1.05254828,  0.98214266, -0.8048191 , ..., -0.9744286 ,
        -0.97703285,  2.34476576],
       [ 1.04318455, -0.60701891,  2.0458901 , ...,  0.86143887,
         1.66996103,  2.33223796],
       [ 1.03850269,  1.85618152, -0.53574589, ..., -0.82077735,
        -0.84363692,  1.7826994 ],
       ...,
       [ 1.77823747, -0.92485123, -0.17499526, ..., -0.3695372 ,
        -0.17404163, -1.14259331],
       [ 1.77823747, -0.84539315, -0.35559977, ..., -0.60442933,
        -0.39375258, -1.05458292],
       [ 1.75014627, -1.00430931,  0.06840827, ..., -0.03397701,
         0.07967221, -0.78012947]])

In [8]:
y

array([452600., 358500., 352100., ...,  92300.,  84700.,  89400.])