In [1]:
# 1) Design Model (input, output, forward pass)
# 2) Construct loss and optimizer
# 3) Training Loop
#  - forward pass: compute prediction
#  - backward pass: compute gradients
#  - update weights

In [2]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from pprint import pprint

## EDA and Data Preprocessing

In [3]:
bc = datasets.load_breast_cancer()
bc.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [4]:
pprint(bc['DESCR'])

('.. _breast_cancer_dataset:\n'
 '\n'
 'Breast cancer wisconsin (diagnostic) dataset\n'
 '--------------------------------------------\n'
 '\n'
 '**Data Set Characteristics:**\n'
 '\n'
 '    :Number of Instances: 569\n'
 '\n'
 '    :Number of Attributes: 30 numeric, predictive attributes and the class\n'
 '\n'
 '    :Attribute Information:\n'
 '        - radius (mean of distances from center to points on the perimeter)\n'
 '        - texture (standard deviation of gray-scale values)\n'
 '        - perimeter\n'
 '        - area\n'
 '        - smoothness (local variation in radius lengths)\n'
 '        - compactness (perimeter^2 / area - 1.0)\n'
 '        - concavity (severity of concave portions of the contour)\n'
 '        - concave points (number of concave portions of the contour)\n'
 '        - symmetry\n'
 '        - fractal dimension ("coastline approximation" - 1)\n'
 '\n'
 '        The mean, standard error, and "worst" or largest (mean of the three\n'
 '        worst/largest val

In [5]:
print("NUMBER OF FEATURES")
print("---------------------")
print(len(bc["feature_names"]))
print("\nFEATURE NAMES")
print("---------------------")
print(list(bc["feature_names"]))
print("\nCLASS NAMES")
print("---------------------")
print(bc["target_names"])
print("\nFIRST 10 TARGET/GROUNDTRUTH VALUES")
print("---------------------")
print(list(bc["target"][:10]))
print("\nFIRST ROW OF FEATURES")
print("---------------------")
print(bc["data"][0])

NUMBER OF FEATURES
---------------------
30

FEATURE NAMES
---------------------
['mean radius', 'mean texture', 'mean perimeter', 'mean area', 'mean smoothness', 'mean compactness', 'mean concavity', 'mean concave points', 'mean symmetry', 'mean fractal dimension', 'radius error', 'texture error', 'perimeter error', 'area error', 'smoothness error', 'compactness error', 'concavity error', 'concave points error', 'symmetry error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst perimeter', 'worst area', 'worst smoothness', 'worst compactness', 'worst concavity', 'worst concave points', 'worst symmetry', 'worst fractal dimension']

CLASS NAMES
---------------------
['malignant' 'benign']

FIRST 10 TARGET/GROUNDTRUTH VALUES
---------------------
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

FIRST ROW OF FEATURES
---------------------
[1.799e+01 1.038e+01 1.228e+02 1.001e+03 1.184e-01 2.776e-01 3.001e-01
 1.471e-01 2.419e-01 7.871e-02 1.095e+00 9.053e-01 8.589e+00 1.534e+02
 6.399e-0

In [6]:
X, y = bc.data, bc.target

In [7]:
n_samples, n_features = X.shape
print(n_samples,n_features)

569 30


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=1234)

In [9]:
# Convert data to Standard Normal Form
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [10]:
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

In [11]:
print(y_train.shape)

torch.Size([455])


In [12]:
y_train = y_train.view(y_train.shape[0],1)
y_test = y_test.view(y_test.shape[0],1)
print(y_train[:5])
print(y_train.shape)

tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.]])
torch.Size([455, 1])


## model

In [13]:
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1) # 1 output (binary)
        
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

In [14]:
model = LogisticRegression(n_features)

## Loss and Optimizer

In [15]:
criterion = nn.BCELoss()

In [16]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

## Training Loop

In [17]:
epochs = 100

In [18]:
for epoch in range(epochs):
    
    # Forward Pass
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)
    
    # Backward Pass
    loss.backward()
    
    # Update Weights
    optimizer.step()
    optimizer.zero_grad()
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch + 1} - loss: {loss.item():.4f}")

Epoch 10 - loss: 0.6389
Epoch 20 - loss: 0.5151
Epoch 30 - loss: 0.4388
Epoch 40 - loss: 0.3871
Epoch 50 - loss: 0.3497
Epoch 60 - loss: 0.3211
Epoch 70 - loss: 0.2986
Epoch 80 - loss: 0.2802
Epoch 90 - loss: 0.2648
Epoch 100 - loss: 0.2518


In [20]:
with torch.no_grad():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round()
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f"Accuracy = {acc:.4f}")

Accuracy = 0.9035
