### 1. Preparing Classification Data

In [150]:
import pandas as pd
import torch
Data_Cancer = pd.read_csv('bcancer_data.csv')

In [151]:
Data_Cancer

Unnamed: 0,Cl.thickness,Cell.size,Cell.shape,Marg.adhesion,Epith.c.size,Bare.nuclei,Bl.cromatin,Normal.nucleoli,Mitoses,Class
0,1,1,1,1,2,1,2,1,1,0
1,5,1,1,1,1,1,3,1,1,0
2,4,1,1,1,2,1,1,1,1,0
3,2,1,1,1,2,1,2,1,1,0
4,1,2,3,1,2,1,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...
535,1,1,1,1,2,1,1,1,1,0
536,3,1,1,1,2,1,2,3,1,0
537,4,1,1,1,2,1,1,1,1,0
538,2,1,1,1,2,1,1,1,1,0


In [152]:
# Select Target Attribute and Features
Target = Data_Cancer['Class']
Features = Data_Cancer[['Bl.cromatin', 'Bare.nuclei','Normal.nucleoli', 'Mitoses']]

# Covert data into tensors
X = torch.tensor(Features.values, dtype=torch.float)
y = torch.tensor(Target.values, dtype=torch.float)

# View the first 5 samples
X[:5], y[:5]


(tensor([[2., 1., 1., 1.],
         [3., 1., 1., 1.],
         [1., 1., 1., 1.],
         [2., 1., 1., 1.],
         [2., 1., 1., 1.]]),
 tensor([0., 0., 0., 0., 0.]))

In [153]:
# Dividing data into 2 data sets
from sklearn.model_selection import train_test_split

# 80% training set and 20% test set
train_X, test_X, train_y, test_y = train_test_split(X, y , test_size=0.2, random_state=42)

print('The length of train is:', len(train_X))
print('The length of test is:', len(test_X))

The length of train is: 432
The length of test is: 108


### 2. Building the Classifaction Model

In [154]:
# Import standard PyTorch
from torch import nn

torch.manual_seed(42)

<torch._C.Generator at 0x79f5bc2e6f70>

In [155]:
# Constructing a model class that subclasses nn.Module
class BreastCancerModel1(nn.Module):
    def __init__(self):
        super().__init__()
        # Creating 2 nn.Linear layers to handle the X and y input and output shapes
        self.layer_1 = nn.Linear(in_features=4, out_features=10) # inputs 4 attributes (X), outputs 10 attributes
        self.output = nn.Linear(in_features=10, out_features=1)  # inputs 10 atributes, outputs 1 attribute (y)

        # Add non-linear activation function (ReLU) for hidden layer
        self.relu = nn.ReLU()

    def forward(self, x):
        # Compute ReLU in between the layers, Return a single feature from the output
        return self.output(self.relu(self.layer_1(x)))


In [156]:
# Create an instance of the model
BCmodel_0 = BreastCancerModel1()
BCmodel_0

BreastCancerModel1(
  (layer_1): Linear(in_features=4, out_features=10, bias=True)
  (output): Linear(in_features=10, out_features=1, bias=True)
  (relu): ReLU()
)

In [157]:
# Setup BCEWithLogitsLoss as the loss function
loss_fn = nn.BCEWithLogitsLoss()
# Setup torch.optim.SGD as the optimizer
optimizer = torch.optim.SGD(BCmodel_0.parameters(), lr=0.1)

In [158]:
# Calculate accuracy in order to train model
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [159]:
# Determine number of epochs
torch.manual_seed(42)
epochs = 100

# Construct evalauation and training loop
for epoch in range(epochs):
    # TRAINING THE MODEL
    # 1 - Forward pass
    y_logits = BCmodel_0(train_X).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits))

    # 2 - Calculate loss and accuracy
    loss = loss_fn(y_logits, train_y) # Calculating the loss using logits
    acc = accuracy_fn(y_true=train_y,
                      y_pred=y_pred)

    # 3 - Optimizer zero grad
    optimizer.zero_grad()

    # 4 - Loss backward
    loss.backward()

    # 5 - Optimizer step
    optimizer.step()

# TESTING THE MODEL
    BCmodel_0.eval()
    with torch.inference_mode():
          # 1 - Forward pass
          test_logits = BCmodel_0(test_X).squeeze()
          test_pred = torch.round(torch.sigmoid(test_logits))
          # 2 - Calcuate loss and accuracy
          test_loss = loss_fn(test_logits, test_y)
          test_acc = accuracy_fn(y_true= test_y,
                                y_pred=test_pred)
    # Print out what's happening
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%")



Epoch: 0 | Loss: 1.01539, Accuracy: 54.86% | Test Loss: 0.75656, Test Accuracy: 40.74%
Epoch: 10 | Loss: 0.54787, Accuracy: 44.44% | Test Loss: 0.53087, Test Accuracy: 42.59%
Epoch: 20 | Loss: 0.46570, Accuracy: 89.12% | Test Loss: 0.43736, Test Accuracy: 92.59%
Epoch: 30 | Loss: 0.41162, Accuracy: 89.35% | Test Loss: 0.37192, Test Accuracy: 94.44%
Epoch: 40 | Loss: 0.37355, Accuracy: 90.28% | Test Loss: 0.32428, Test Accuracy: 94.44%
Epoch: 50 | Loss: 0.34464, Accuracy: 90.28% | Test Loss: 0.28824, Test Accuracy: 94.44%
Epoch: 60 | Loss: 0.32112, Accuracy: 91.20% | Test Loss: 0.25982, Test Accuracy: 96.30%
Epoch: 70 | Loss: 0.30094, Accuracy: 91.20% | Test Loss: 0.23650, Test Accuracy: 96.30%
Epoch: 80 | Loss: 0.28323, Accuracy: 91.44% | Test Loss: 0.21682, Test Accuracy: 96.30%
Epoch: 90 | Loss: 0.26740, Accuracy: 92.82% | Test Loss: 0.19990, Test Accuracy: 96.30%


### 3. Build Confusion Matrix

In [160]:
# Import evaluation tools
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score

In [161]:
#Building Confusion Matrix
conf_matrix = confusion_matrix(test_pred, test_y, labels=[1, 0])
True_Positive = conf_matrix[0, 0]
False_Positive = conf_matrix[0, 1]
False_Negative = conf_matrix[1, 0]
True_Negative = conf_matrix[1, 1]
print("True_Positive:", True_Positive, "False_Positive:", False_Positive, "False_Negative:", False_Negative, "True_Negative", True_Negative)

True_Positive: 42 False_Positive: 2 False_Negative: 2 True_Negative 62


### 4. Calculate Accuracy, Precision, and Recall.

In [162]:
print('Precision Score: %.3f' % precision_score(test_y, test_pred))

Precision Score: 0.955


In [163]:
print('Recall Score: %.3f' % recall_score(test_y, test_pred))

Recall Score: 0.955


In [164]:
print('Accuracy Score: %.3f' % accuracy_score(test_y, test_pred))

Accuracy Score: 0.963
