In [1]:
# 1) design model (input size, output size, forward pass)
# 2) construct loss and optimizer
# 3) training loop
#    - forward pass: compute predicgtion
#    - backward pass: gradients
#    - update weights
import torch
import torch.nn as nn # 用来获取网络模型
import numpy as np # 用来进行数据转换
from sklearn import datasets # 生成数据库
from sklearn.preprocessing import StandardScaler # 用于把 features 标量化
from sklearn.model_selection import train_test_split # 用于分割测试数据和训练数据
import matplotlib.pyplot as plt

In [2]:
# 0) prepare data
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape # n_samples = 569, n_features = 30

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# scale features 正则化，使 features 的均值为 0，方差为 1
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_text = sc.fit_transform(X_test)

# 转换成 tensor
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

# 行向量转列向量
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

In [3]:
# 1) model
# f = wx + b, sigmoid at the end
class LogisticRegression(nn.Module):
    
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted
    
model = LogisticRegression(n_features)

In [4]:
# 2) loss and optimizer
learning_rate = 0.01
criterion = nn.MSELoss() # Binary Cross Entropy
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [5]:
# 3) training loop
num_epochs = 200
for epoch in range(num_epochs):
    # forward pass and loss
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)
    
    # backward pass
    loss.backward()
    
    # update
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if (epoch + 1) % 10 == 0:
        print(f"epoch: {epoch + 1}, loss = {loss.item():.4f}")

epoch: 10, loss = 0.2179
epoch: 20, loss = 0.1875
epoch: 30, loss = 0.1650
epoch: 40, loss = 0.1481
epoch: 50, loss = 0.1351
epoch: 60, loss = 0.1247
epoch: 70, loss = 0.1163
epoch: 80, loss = 0.1093
epoch: 90, loss = 0.1035
epoch: 100, loss = 0.0985
epoch: 110, loss = 0.0942
epoch: 120, loss = 0.0904
epoch: 130, loss = 0.0871
epoch: 140, loss = 0.0841
epoch: 150, loss = 0.0814
epoch: 160, loss = 0.0790
epoch: 170, loss = 0.0768
epoch: 180, loss = 0.0748
epoch: 190, loss = 0.0729
epoch: 200, loss = 0.0712


In [6]:
# Evaluate
with torch.no_grad():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round() # 如果不使用 no_grad，这一部分会被计入 computation graph
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f"accuracy = {acc:.4f}")

accuracy = 0.3947
