# Import library and dataset from sklearn

In [1]:
import torch, os
from torch import nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.datasets import load_breast_cancer
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target

# Change the dataset to tensor and split train and test dataset 

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_a = torch.from_numpy(X.astype(np.float32)).to(device)
y_a = (torch.from_numpy(y.astype(np.float32))).view(-1,1).to(device)

dataset = TensorDataset(X_a, y_a)

test_ratio = 0.2  # 20% data split into test dataset
n_sample,n_feature = X_a.shape
test_size = int(n_sample * test_ratio)
train_size = n_sample - test_size
train_ds, test_ds = random_split(dataset,[train_size, test_size])
train_dataloader = DataLoader(train_ds, batch_size=train_size, shuffle=True)
test_dataloader = DataLoader(test_ds, batch_size=test_size)

# Create a NN model with 2 hidden layer
first layer = 10

second layer = 5

In [5]:
class LogisticNN(nn.Module):
    def __init__(self):
        super(LogisticNN, self).__init__()
        layer1_out = 10
        layer2_out = 5
        self.layer1 = nn.Linear(30,layer1_out,bias=True)
        self.layer2 = nn.Linear(layer1_out,layer2_out,bias=True)
        self.output = nn.Linear(layer2_out,1,bias=True)

    def forward(self, x):
        z = torch.relu(self.layer1(x))
        z = torch.relu(self.layer2(z))
        z = torch.sigmoid(self.output(z))
        return z

# Load, Train, and Save the model
loss model = Binary Cross Entropy

Optimizer = Adam optimizer

In [9]:
model = LogisticNN().to(device)
if os.path.exists('breast_cancer.pt'):
    model.load_state_dict(torch.load('breast_cancer.pt'))
    epoch_max = 100
    report = 10
else:
    epoch_max = 10000
    report = 500
loss = nn.BCELoss()
rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(),lr=rate,weight_decay=1e-3)
for i in range(epoch_max):
    for x_batch_train, y_batch_train in train_dataloader:
        z = model(x_batch_train)
        J = loss(z,y_batch_train)
        J.backward()
        optimizer.step()
        optimizer.zero_grad()
    if i % report  == 0:
        with torch.no_grad():
            acc = (1-(z.round()-y_batch_train).abs().mean().item())*100
            for x_batch_test, y_batch_test in test_dataloader:
                zx = model(x_batch_test)
                acc_test = (1-(zx-y_batch_test).abs().mean().item())*100
        print(f'Epoch {i}: Cost = {J.item():.4f}, train acc = {acc:.4f}%, test acc = {acc_test:.4f}%')

torch.save(model.state_dict(),'breast_cancer.pt')

Epoch 0: Cost = 0.1367, train acc = 97.5877%, test acc = 95.5154%
Epoch 10: Cost = 0.2344, train acc = 94.5175%, test acc = 98.2503%
Epoch 20: Cost = 0.1697, train acc = 96.4912%, test acc = 99.2701%
Epoch 30: Cost = 0.1265, train acc = 97.1491%, test acc = 99.2481%
Epoch 40: Cost = 0.1120, train acc = 97.1491%, test acc = 99.0231%
Epoch 50: Cost = 0.1134, train acc = 97.3684%, test acc = 99.1146%
Epoch 60: Cost = 0.1102, train acc = 97.5877%, test acc = 99.2165%
Epoch 70: Cost = 0.1090, train acc = 97.1491%, test acc = 99.1269%
Epoch 80: Cost = 0.1084, train acc = 97.3684%, test acc = 99.1576%
Epoch 90: Cost = 0.1078, train acc = 97.8070%, test acc = 99.1213%


# Report of the neural network training
createing confusion matrix, accuracy, recall, precision, and F1 score. 

In [117]:
confusion_matrix_val = np.zeros(y.shape).astype(int)

with torch.no_grad():
    # first change data from gpu memory to cpu memory 
    # Change to numpy, vectorize, rounding
    z = model(X_a).cpu().detach().numpy().flatten()

#change to interger with 0.5 as reference 
z = z>=0.5  
for i, val in enumerate(y):
    if y[i] == z[i]: #True value

        if y[i]:
            confusion_matrix_val[i] = 1 # True Positive
        else:
            confusion_matrix_val[i] = 4 # True Negative
    else:
        if y[i]:
            confusion_matrix_val[i] = 3 # False negative, predicted negative but true value positive
        else:
            confusion_matrix_val[i] = 2
        

confusion_mat = np.array([0,0,0,0]) # TP, FP, FN, TN
for i in range(4):
    confusion_mat[i] = np.count_nonzero((confusion_matrix_val==(i+1))*1)
precision = confusion_mat[0]/(confusion_mat[0]+confusion_mat[1])
recall = confusion_mat[0]/(confusion_mat[0]+confusion_mat[2])
F1 = 2*precision*recall/(precision+recall)
accuracy = (confusion_mat[0]+confusion_mat[3]) /confusion_mat.sum()

print(f'The result show for all value that\
    \n True positive = {confusion_mat[0]}\
    \n False positive = {confusion_mat[1]}\
    \n False Negative = {confusion_mat[2]}\
    \n True negative = {confusion_mat[3]}\
    \n\n Accuracy = {accuracy*100:.5f}%\
    \n Precision = {precision*100:.5f}%\
    \n Recall = {recall*100:.5f}%\
    \n F1 score = {F1:.4f}    ')

The result show for all value that    
 True positive = 353    
 False positive = 7    
 False Negative = 4    
 True negative = 205    

 Accuracy = 98.06678%    
 Precision = 98.05556%    
 Recall = 98.87955%    
 F1 score = 0.9847    
