In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt

# 多维度数据集的输入

## 补充资料

- [SkLearn](https://scikit-learn.org/stable/)

## 多维逻辑回归模型

$$ \hat y^{(i)} = \sigma(\sum^8_{n=1} x_n^{(i)} \cdot \omega_n + b) $$

## Mini-Batch Case

$$ 
\begin{bmatrix}
    \hat y^{(1)} \\
    \vdots \\
    \hat y^{(m)}
\end{bmatrix}
=
\begin{bmatrix}
    \sigma(z^{(1)}) \\
    \vdots \\
    \sigma(z^{(m)})
\end{bmatrix}
=
\sigma(
\begin{bmatrix}
    z^{(1)} \\
    \vdots \\
    z^{(m)}
\end{bmatrix}
)
$$

This indicates that the sigmoid function (provided in pytorch) is in an element-wise fasion.

## Step-1 Prepare Dataset

数据来源: [Kaggle](https://www.kaggle.com/datasets/uciml/pima-indians-diabetes-database)
原始数据: 美国国家糖尿病和消化和肾脏疾病研究所（NIDDK）

- 数据集包含768个样本，每个样本包含8个特征，以及一个标签


In [2]:
dataset = np.loadtxt('./dataset/diabetes/diabetes.csv', delimiter=',', dtype=np.float32, skiprows=1)

x_data = torch.from_numpy(dataset[:, 0:8])
y_data = torch.from_numpy(dataset[:, [8]])

## Step-2 Define Model

这里我们构建的是三层线性模型 8 -> 6 -> 4 -> 1

In [3]:
class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = torch.nn.Linear(8, 6)
        self.linear2 = torch.nn.Linear(6, 4)
        self.linear3 = torch.nn.Linear(4, 1)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.sigmoid(self.linear1(x))
        x = self.sigmoid(self.linear2(x))
        x = self.sigmoid(self.linear3(x))
        return x
    
model = Model()

## Step-3 Construct Loss and Optimizer



In [4]:
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

## Step-4 Training

In [8]:
for epoch in range(100000):
    y_pred = model(x_data)          # Note that this program has not use the mini-batch for training.
    loss = criterion(y_pred, y_data)
    print(epoch, loss.item())

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()



0 0.48835405707359314
1 0.4935561716556549
2 0.489612340927124
3 0.499696284532547
4 0.490575909614563
5 0.4990144670009613
6 0.49268004298210144
7 0.49243107438087463
8 0.49074408411979675
9 0.49072265625
10 0.4892822802066803
11 0.4928989112377167
12 0.49310991168022156
13 0.4901914596557617
14 0.48829129338264465
15 0.49480339884757996
16 0.48740148544311523
17 0.48811623454093933
18 0.4868526756763458
19 0.4893327057361603
20 0.49690595269203186
21 0.48919305205345154
22 0.4875166714191437
23 0.4920326769351959
24 0.48641112446784973
25 0.4897156059741974
26 0.49030932784080505
27 0.4986060559749603
28 0.49118533730506897
29 0.4918515682220459
30 0.48994335532188416
31 0.4920376241207123
32 0.49316561222076416
33 0.49100685119628906
34 0.48881006240844727
35 0.4922003448009491
36 0.4959634840488434
37 0.49130979180336
38 0.4885721206665039
39 0.4939030110836029
40 0.48806285858154297
41 0.48744311928749084
42 0.4863179624080658
43 0.4915963411331177
44 0.486701637506485
45 0.492594

In [17]:
cnt = 0
for i in range(len(y_data)):
    if y_pred[i].item() >= 0.6 and y_data[i] == 1:
        cnt += 1
    elif y_pred[i].item() < 0.4 and y_data[i] == 0:
        cnt += 1