In [2]:
# 1) Define model (input, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training loop
#   - forward pass: compute prediction and loss
#   - backward pass: gradients
#   - update weights

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [31]:
# 0) Prepare data

## load breastcaner dataset
bc = datasets.load_breast_cancer()

## defining X and y from dataset
X, y = bc.data, bc.target

## Check dimension of X 
n_samples, n_features = X.shape
print(f"# of samples: {n_samples}, # of features: {n_features}")

## check dimension of y
print(f"# of samples target: {y.shape}")

## Create train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

## Scale dataset
sc = StandardScaler()
X_train = sc.fit_transform(X_train) # mean = 0, std = 1
X_test = sc.transform(X_test) 

## Convert numpy array into torch tensor
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

## reshape y (create column vector)
y_train = y_train.view(y_train.shape[0], 1) 
y_test = y_test.view(y_test.shape[0], 1)

# 1) Model 
## f = wx + b, sigmoid at the end

class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
    
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

model = LogisticRegression(n_features)

# 2) Loss and optimizer

## define Learning_rate
learning_rate = 1

## define criterion 
criterion = nn.BCELoss() # Binary Cross Entropy Loss

## define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 3) Training loop
num_epochs = 10000
for epoch in range(num_epochs):
    # forward pass and loss
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)
    # backward pass
    loss.backward()

    # update weights
    optimizer.step()

    # zero gradients
    optimizer.zero_grad()

    if (epoch+1) % 1000 == 0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')
    
# 4) Evaluation
with torch.no_grad():
    y_predicted = model(X_test) # predict y
    y_predicted_cls = y_predicted.round() # round to 0 or 1, 0.5 is rounded to 1

    # calculate accuracy
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0]) # eq: equal to y_test
    print(f'accuracy = {acc:.4f}')



# of samples: 569, # of features: 30
# of samples target: (569,)
epoch: 1000, loss = 0.0221
epoch: 2000, loss = 0.0182
epoch: 3000, loss = 0.0158
epoch: 4000, loss = 0.0140
epoch: 5000, loss = 0.0126
epoch: 6000, loss = 0.0115
epoch: 7000, loss = 0.0105
epoch: 8000, loss = 0.0097
epoch: 9000, loss = 0.0090
epoch: 10000, loss = 0.0084
accuracy = 0.9649
