# PyTorch Tutorial 08 - Logistic Regression

https://www.youtube.com/watch?v=OGpQxIkR4ao&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=8

In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

## prepare data

In [2]:
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X[:1], y[:1]

(array([[1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
         3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
         8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
         3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
         1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01]]),
 array([0]))

In [3]:
X.shape, y.shape

((569, 30), (569,))

In [4]:
n_samples, n_features = X.shape

n_samples, n_features

(569, 30)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2022)

#### scaler

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

In [8]:
y_train.shape, y_test.shape

(torch.Size([455]), torch.Size([114]))

In [9]:
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

In [10]:
y_train.shape, y_test.shape

(torch.Size([455, 1]), torch.Size([114, 1]))

## model

In [11]:
# f = wx + b, sigmoid at the end

In [12]:
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)  # 1 stays for the number of outputs
        
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

In [13]:
model = LogisticRegression(n_features)

In [14]:
learning_rate = 0.01
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

## training loop

In [15]:
num_epochs = 1000
for epoch in range(num_epochs):
    # forward pass and loss
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)
    
    # backward pass
    loss.backward()
    
    # update
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if (epoch+1) % 50 == 0:
        print(f'epoch {epoch+1}, loss = {loss.item():.4f}')

epoch 50, loss = 0.3489
epoch 100, loss = 0.2645
epoch 150, loss = 0.2232
epoch 200, loss = 0.1978
epoch 250, loss = 0.1803
epoch 300, loss = 0.1674
epoch 350, loss = 0.1574
epoch 400, loss = 0.1493
epoch 450, loss = 0.1427
epoch 500, loss = 0.1372
epoch 550, loss = 0.1325
epoch 600, loss = 0.1284
epoch 650, loss = 0.1248
epoch 700, loss = 0.1216
epoch 750, loss = 0.1187
epoch 800, loss = 0.1162
epoch 850, loss = 0.1139
epoch 900, loss = 0.1118
epoch 950, loss = 0.1099
epoch 1000, loss = 0.1081


In [16]:
with torch.no_grad():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round()
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'accuracy = {acc:.4f}')

accuracy = 0.9825


# comparison with sklearn LogisticRegression

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2022)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [18]:
from sklearn.linear_model import LogisticRegression

In [19]:
model = LogisticRegression()

In [20]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [21]:
from sklearn.metrics import accuracy_score

In [22]:
accuracy_score(y_test, y_pred)

0.9736842105263158