# 第二周 作业

## 1.使用sklearn数据集训练逻辑回归模型

In [19]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

X, y = load_iris(return_X_y=True)
print(X.shape)

(150, 4)


In [20]:
X = X[:100]
y = y[:100]

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [22]:
# 权重参数
theta = np.random.randn(1, 4)
bias = 0
# 超参数-学习率
lr = 0.01
epochs = 3000 # 迭代次数

In [23]:
# 模型计算函数
def forward(x, theta, bias):
    # 线性运算
    z = np.dot(theta, x.T) + bias
    # 激活函数 sigmoid
    y_hat = 1 / (1 + np.exp(-z))
    return y_hat

# 损失函数
def loss(y, y_hat):
    e = 1e-8
    return y * np.log(y_hat + e) + (1 - y) * np.log(1 - y_hat + e)

# 梯度计算
def gradient(x, y, y_hat):
    m = x.shape[-1]
    delta_theta = np.dot((y_hat - y), x) / m
    delta_bias = np.mean(y_hat - y)
    return delta_theta, delta_bias

In [24]:
# 模型训练
for i in range(epochs):
    # 前向计算
    y_hat = forward(X_train, theta, bias)
    # 计算损失
    l = loss(y_train, y_hat)
    # 计算梯度
    delta_theta, delta_bias = gradient(X_train, y_train, y_hat)
    # 更新参数
    theta -= lr * delta_theta
    bias -= lr * delta_bias
    if i % 100 == 0:
        print(f'epoch {i}, loss {np.mean(l)}') 

epoch 0, loss -5.4251679482761075
epoch 100, loss -0.04444779153727488
epoch 200, loss -0.022586495057392096
epoch 300, loss -0.015426708311732353
epoch 400, loss -0.011824848695305928
epoch 500, loss -0.009640920019046512
epoch 600, loss -0.008168478519477619
epoch 700, loss -0.007105072634149059
epoch 800, loss -0.006299104245385783
epoch 900, loss -0.005666009274197884
epoch 1000, loss -0.005154803352732994
epoch 1100, loss -0.004732872942268587
epoch 1200, loss -0.004378359458913729
epoch 1300, loss -0.004076048919437945
epoch 1400, loss -0.0038150165467508647
epoch 1500, loss -0.0035872091084205044
epoch 1600, loss -0.00338655593529557
epoch 1700, loss -0.003208391724157431
epoch 1800, loss -0.003049070397172498
epoch 1900, loss -0.0029056999736674105
epoch 2000, loss -0.0027759563304447453
epoch 2100, loss -0.0026579497107292287
epoch 2200, loss -0.0025501273055738485
epoch 2300, loss -0.002451201004152404
epoch 2400, loss -0.002360093025079382
epoch 2500, loss -0.002275894460116

In [28]:
# 模型推理
idx = np.random.randint(len(X_test))
x = X_test[idx]
y = y_test[idx]
predict = np.round(forward(x, theta, bias))
print(f'y: {y}, predict: {predict}')



y: 0, predict: [0.]


## 2.调整学习率，样本数据拆分比率，观察训练结果；


In [29]:
lr = 0.1

In [30]:
# 模型训练
for i in range(epochs):
    # 前向计算
    y_hat = forward(X_train, theta, bias)
    # 计算损失
    l = loss(y_train, y_hat)
    # 计算梯度
    delta_theta, delta_bias = gradient(X_train, y_train, y_hat)
    # 更新参数
    theta -= lr * delta_theta
    bias -= lr * delta_bias
    if i % 100 == 0:
        print(f'epoch {i}, loss {np.mean(l)}') 

epoch 0, loss -0.0019350530728572042
epoch 100, loss -0.0014974712617710185
epoch 200, loss -0.0012272990759626449
epoch 300, loss -0.0010429752489183196
epoch 400, loss -0.000908748239393437
epoch 500, loss -0.0008063978590299138
epoch 600, loss -0.0007256329587433714
epoch 700, loss -0.0006601881155818504
epoch 800, loss -0.0006060239138900122
epoch 900, loss -0.0005604154978128373
epoch 1000, loss -0.0005214560643744021
epoch 1100, loss -0.0004877701183128751
epoch 1200, loss -0.00045833973480430507
epoch 1300, loss -0.0004323949854156507
epoch 1400, loss -0.00040934243634278654
epoch 1500, loss -0.00038871710265375796
epoch 1600, loss -0.00037014933012000074
epoch 1700, loss -0.00035334144983918106
epoch 1800, loss -0.000338050992366326
epoch 1900, loss -0.0003240784030573716
epoch 2000, loss -0.00031125790785777697
epoch 2100, loss -0.0002994506236619544
epoch 2200, loss -0.000288539293750388
epoch 2300, loss -0.0002784242170960967
epoch 2400, loss -0.00026902006652146403
epoch 25

In [31]:
# 模型推理
idx = np.random.randint(len(X_test))
x = X_test[idx]
y = y_test[idx]
predict = np.round(forward(x, theta, bias))
print(f'y: {y}, predict: {predict}')


y: 0, predict: [0.]


## 3. 训练后模型参数保存到文件，在另一个代码中加载参数实现预测功能；

In [32]:
np.save('array.npy', theta)

In [33]:
loaded_array = np.load('array.npy')

print(loaded_array)

[[-0.79958289 -4.4782756   6.06842245  2.19584681]]


In [34]:
idx = np.random.randint(len(X_test))
x = X_test[idx]
y = y_test[idx]
predict = np.round(forward(x, loaded_array, bias))
print(f'y: {y}, predict: {predict}')

y: 1, predict: [1.]


## 4.总结逻辑回归运算及训练相关知识点

## 1.概念

逻辑回归是一种线性模型，用于估计事件发生的概率。它通过一个逻辑函数（sigmoid函数）将线性回归的输出映射到0到1之间的概率值。

## 2.sigmoid函数

Sigmoid函数的公式为： $[ \sigma(z) = \frac{1}{1 + e^{-z}} ]$ 其中，$( z = \mathbf{w}^T \mathbf{x} + b )$，$(\mathbf{w})$是权重向量，$( \mathbf{x} )$是输入特征向量，$( b )$是偏置。

## 3. 损失函数

逻辑回归使用对数损失函数$（Log Loss）$来衡量模型的预测误差： $[ L(y, \hat{y}) = -\frac{1}{m} \sum_{i=1}^{m} \left[ y_i \log(\hat{y}_i) + (1 - y_i) \log(1 - \hat{y}_i) \right] ]$ 其中，$( y_i )$是实际标签，$( \hat{y}_i )$是预测概率，$( m )$是样本数量。

## 4. 模型训练

逻辑回归模型的训练过程通常使用梯度下降法来最小化损失函数。主要步骤包括：

1. 初始化权重和偏置。
2. 计算预测值。
3. 计算损失函数的梯度。
4. 更新权重和偏置。