In [81]:
import torch
import torch.nn as nn
import sklearn.datasets as dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

In [82]:
# prepate data

bc = dataset.load_breast_cancer()
X, y = bc.data, bc.target

# X.shape => (samples=569, features=30)
m, features = X.shape

# shuffle = ?
X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=0.2, random_state=42, shuffle=True
                    )


In [83]:
y.shape

(569,)

In [84]:
# scale
# it's often used due to logistic regression

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

In [85]:
# model
# linear model => f = wx + b, sigmoid at the end

class Model(nn.Module):
    def __init__(self, in_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(in_features=in_features, out_features=1)
    def forward(self, x):
        y_hat = torch.sigmoid(self.linear(x))
        return y_hat
    
model = Model(features)

In [86]:
# loss and optimizer

n_epochs = 300
learning_rate = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [87]:
# Training

for epoch in range(n_epochs):
    # forward
    y_hat = model(X_train)
    # loss
    loss = criterion(y_hat, y_train)
    # backward
    loss.backward()
    # update
    optimizer.step()
    # zero grad before next step
    optimizer.zero_grad()

    if (epoch+1) % 10 == 0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')

    

epoch: 10, loss = 0.4179
epoch: 20, loss = 0.3790
epoch: 30, loss = 0.3493
epoch: 40, loss = 0.3259
epoch: 50, loss = 0.3067
epoch: 60, loss = 0.2908
epoch: 70, loss = 0.2772
epoch: 80, loss = 0.2655
epoch: 90, loss = 0.2552
epoch: 100, loss = 0.2462
epoch: 110, loss = 0.2381
epoch: 120, loss = 0.2308
epoch: 130, loss = 0.2242
epoch: 140, loss = 0.2182
epoch: 150, loss = 0.2127
epoch: 160, loss = 0.2076
epoch: 170, loss = 0.2029
epoch: 180, loss = 0.1986
epoch: 190, loss = 0.1945
epoch: 200, loss = 0.1908
epoch: 210, loss = 0.1872
epoch: 220, loss = 0.1839
epoch: 230, loss = 0.1808
epoch: 240, loss = 0.1779
epoch: 250, loss = 0.1751
epoch: 260, loss = 0.1725
epoch: 270, loss = 0.1700
epoch: 280, loss = 0.1677
epoch: 290, loss = 0.1654
epoch: 300, loss = 0.1633


In [88]:
# test

with torch.no_grad():
    y_hat = model(X_test)
    y_hat_cls = y_hat.round()
    acc = y_hat_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'accuracy: {acc.item():.4f}')

accuracy: 0.9737
