In [21]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [22]:
# 0. Data preparing
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape
print(f"n_samples : {n_samples}, n_features : {n_features}")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# scale
sc = StandardScaler() # X, y의 scale 조절
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32)) 
X_test = torch.from_numpy(X_test.astype(np.float32)) 
y_train = torch.from_numpy(y_train.astype(np.float32)) 
y_test = torch.from_numpy(y_test.astype(np.float32)) 

y_train = y_train.view(y_train.shape[0], 1) # only 1 row
y_test = y_test.view(y_test.shape[0], 1) # only 1 row

n_samples : 569, n_features : 30


In [23]:
# 1. Design model (input, output size, forward pass)
# f = wx + b, sigmoid at the end

class LogisticRegression(nn.Module) :
    def __init__(self, n_input_features) : # n_input_features = n_features = 30
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1) # (30, 1) -> input 30, output 1.
        
    def forward(self, x) :
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred
    
model = LogisticRegression(n_features) # n_features = 30

In [24]:
# 2. Construct loss and optimizer
learning_rate = 0.01
criterion = nn.BCELoss() # Binary Cross Entropy Loss
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [25]:
# 3. Training loop
#   - forward pass: compute prediction
#   - backward pass: gradients
#   - update weights

num_epochs = 100
for epoch in range(num_epochs) :
    #   - forward pass: compute prediction
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    
    #   - backward pass: gradients
    loss.backward()
    
    #   - update weights
    optimizer.step()
    optimizer.zero_grad() # gradient가 누적되는 것을 막기 위해
    
    if (epoch+1) % 10 == 0:
        print(f"epoch : {epoch + 1}, loss = {loss.item():.4f}")

epoch : 10, loss = 0.4937
epoch : 20, loss = 0.4202
epoch : 30, loss = 0.3709
epoch : 40, loss = 0.3355
epoch : 50, loss = 0.3087
epoch : 60, loss = 0.2876
epoch : 70, loss = 0.2706
epoch : 80, loss = 0.2563
epoch : 90, loss = 0.2442
epoch : 100, loss = 0.2338


In [26]:
# 4. Evaluation

with torch.no_grad() :
    y_pred = model(X_test)
    y_pred_class = y_pred.round() # sigmoid 출력의 0~1 사이의 확률값을 0 or 1로 바꿔준다.
    accuracy = y_pred_class.eq(y_test).sum() / float(y_test.shape[0]) # prediction이 것을 모두 더하고 전체 개수로 나눈다 = 맞은 개수의 비율
    print(f"accuracy = {accuracy:.4f}")

accuracy = 0.9123
