In [2]:
"""
First, we implement the typical PyTorch pipeline first:
    1. Design model parameters are (input_size, output_size, forward_pass)
    2. Construct loss and optimizer
    3. Training loop:
        - Forward pass - compute prediction and loss
        - Backwards pass - gradients
        - Update weights
        
In this case, wde will add one more layer to our model and add another loss fucniton
"""


Collecting scipy
  Downloading scipy-1.9.3-cp38-cp38-win_amd64.whl (39.8 MB)
Installing collected packages: scipy
  Attempting uninstall: scipy
    Found existing installation: scipy 1.5.0
    Uninstalling scipy-1.5.0:
      Successfully uninstalled scipy-1.5.0
Successfully installed scipy-1.9.3


In [5]:
import torch
import torch.nn as nn # Neural network module
import numpy as np
from sklearn import datasets # To load binary classification dataset 
from sklearn.preprocessing import StandardScaler # Because we want to scale our features
from sklearn.model_selection import train_test_split # Because we want to have a seperation of 
# ... training and testing data

In [26]:
## 0. Prepare data

# Load breast cancer data set
bc = datasets.load_breast_cancer() # Binary classification problem where we can predict cancer based 
# ... on input features
X, y = bc.data, bc.target

n_samples, n_features = X.shape # 569 x 30

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 1234)
# test size = 20%

# Scale our features
sc = StandardScaler() # Will make our features have 0 mean and unit (1) varience
# Always do when we deal with logistic regression
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Now convert data to torch tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

# Reshape y-tensors
y_train = y_train.view(y_train.shape[0], 1) # We want to make y a column vector as opposed to 1 row
y_test = y_test.view(y_test.shape[0], 1)


## 1. Model

# Our model is a linear combination of wieghts and our bias
# f = wx + b
# In the logistic regression case, we apply a sigmoid at the end

class LogisticRegression(nn.Module): # Custom model class
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        # Only have 1 layer
        self.linear = nn.Linear(n_input_features, 1) # input size = n_input_features, output_size = 1
        # ... because only one class label at the end
        
    def forward(self, x):
        # Forward pass
        # x = data
        
        # First apply linear layer
        liner_data = self.linear(x)
        # Then sigmoid fucntion
        y_predicted = torch.sigmoid(liner_data)
        # The sigmoid funciton will return a value between 0 and 1
        # If this is larger than 0.5, we say it is 1, if it is smaller we say it is 0
        
        
        
        return y_predicted
    
model = LogisticRegression(n_features) # Later is 30 input features, 1 ouptut feature


## 2. Loss and Optimizer
learning_rate = 0.01

# Loss function (criterion) is different than in the linear case
criterion = nn.BCELoss() # Binary cross entropy loss

# Optimizer is the same
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

## 3. Training loop

num_epochs = 100

for epoch in range(num_epochs):
    # Forward pass
    y_predicted = model(X_train)
    
    # Loss calculation
    loss = criterion(y_predicted, y_train)
    
    # Backward pass - calculate gradients
    loss.backward() # Remember, PyTorch does all of the gradient calculations for us
    
    # Updates
    optimizer.step() # PyTorch does all the update calculations for us
    
    # Empty gradients (zero gradients)
    optimizer.zero_grad()
    
    # Print information
    if ((epoch + 1) % 10 == 0):
        print(f'epoch: {epoch + 1}, loss = {loss.item():.4f}')

# Evaluate our model - this should not be -part of our computational graph where we want to track
# ... the history
with torch.no_grad():
    # If we don't use this statement, the .round() would be part of the computational graph
    # ... and would track the gradient calculations for us, which we don't need cause we
    # ... are done
    
    # Get the accuracy
    y_predicted = model(X_test)
    # Convert to class labels (0 or 1)
    
    y_predicted_class = y_predicted.round() # Remember, sigmoid function is 1 if > 0.5 else it is 0
    
    accuracy = y_predicted_class.eq(y_test).sum() / float(y_test.shape[0]) # Accuracy of our model
    # Will sum all of the predictions that are correct i.e. + 1 for every correct prediction
    # Divided by the number of samples, y_test_shape[0]
    
    print(f'accuracy = {accuracy:.4f}')
    """
    0.8860 accuracy, good but not perfect
    Might want to play around with:
        > Number of iterations
        > Number of epochs
        > Learning rate
        > Different optimizer
    """
    
# This is the finished implementation of logistic regression in PyTorch

epoch: 10, loss = 0.5629
epoch: 20, loss = 0.4814
epoch: 30, loss = 0.4245
epoch: 40, loss = 0.3825
epoch: 50, loss = 0.3499
epoch: 60, loss = 0.3239
epoch: 70, loss = 0.3026
epoch: 80, loss = 0.2847
epoch: 90, loss = 0.2694
epoch: 100, loss = 0.2562
accuracy = 0.8860


torch.Size([114, 1])
