In [1]:
import torch
from torch import nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [5]:
# prepare data
datasets = datasets.load_breast_cancer()
dir(datasets)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [33]:
X, y = datasets.data, datasets.target
samples, features = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
# random_state不填的时候，每次都是随机的，当填入确定的random_state的时候，每一次的训练和测试集是一致的

In [35]:
# print(X_train)
# print(X_test)

In [37]:
scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

y_train = y_train.reshape(y_train.shape[0], 1)
y_test = y_test.reshape(y_test.shape[0], 1)

In [40]:
# Model
class Logistic(nn.Module):
    def __init__(self, input_features):
        super(Logistic, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=1),
            # 这里的out_features是1，是因为逻辑回归给的是一个二分类问题，想要的是一个判断的概率
            nn.Sigmoid()
        )

    def forward(self, x):
        output = self.model(x)
        return output

model = Logistic(features)

In [43]:
# Loss and Optimizer
loss_fn = nn.BCELoss()
learning_rate = 1e-2
optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate)

In [45]:
# train
epoch = 100
for i in range(epoch):
    output = model(X_train)
    loss = loss_fn(output, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 10 == 0:
        print(f'---第{i+1}次训练的Loss是{loss}!---')

---第10次训练的Loss是0.24787487089633942!---
---第20次训练的Loss是0.23767127096652985!---
---第30次训练的Loss是0.2286403924226761!---
---第40次训练的Loss是0.22057881951332092!---
---第50次训练的Loss是0.21332913637161255!---
---第60次训练的Loss是0.20676735043525696!---
---第70次训练的Loss是0.20079419016838074!---
---第80次训练的Loss是0.1953292191028595!---
---第90次训练的Loss是0.1903064101934433!---
---第100次训练的Loss是0.18567104637622833!---


In [51]:
# Test
model.eval()
with torch.no_grad():      # 如果缺少这一行，出来的结果将是带有grad_fn的
    y_predicted = model(X_test)
    y_predicted_round = y_predicted.round()
    acc = (y_predicted_round == y_test).sum() / len(y_test)
    print(f'Accuracy:{acc.item():.5f}')

Accuracy:0.90351
