In [None]:
import matplotlib.pyplot as plt
import os

# Function inputs arg 1: num_epochs --> The number of iterations over which the model is refined. 
# Function inputs arg 2: loss_array --> Array of size 1 x num_epochs. This array contains the calculated vales of BCE loss made when refining the model with SGD. 
# Function inputs arg 3: save_plot --> True or Flase. When true, saves plot to data directory.  
# Function inputs arg 4: display_plot --> True or Flase. When true, displays the plot. 
# Function output: Graph with the BCE loss per epoch.
def loss_graph(num_epochs, loss_array, save_plot, display_plot):
    
    # Plot the BCE calculated loss per epoch. 
    y = list(range(0,num_epochs))
    plt.plot(y, loss_array)
    plt.rcParams.update({'font.size': 15})
    plt.ylabel('BCE calculated loss', labelpad=10) # The leftpad argument alters the distance of the axis label from the axis itself. 
    plt.xlabel('Epoch', labelpad=10)

    # Save the plot if the user desires it.
    if save_plot:
        current_directory = os.getcwd()
        file_path = current_directory.replace('logistic-classification-package', 'img')
        file_path = os.path.join(file_path, 'BCE_calculated_loss.png')
        plt.savefig(file_path, dpi=200, bbox_inches='tight')
    
    # Display the plot if the user desires it. 
    if (display_plot == False):
        plt.close()
    else:
        plt.show()   

In [None]:
import itertools
import numpy as np
import matplotlib.pyplot as plt

# Function inputs arg 1: cm --> The confusion matrix as generated by the function 'confusion_matrix()'
# Function inputs arg 2: classes --> Tuple of strings to label class identities on the plot.  
# Function inputs arg 3: normalize --> True or Flase. When true, data is normalized between 0 and 1.  
# Function inputs arg 4: title --> A string. 
# Function inputs arg 5: cmap --> The chosen colormap. 
# Function inputs arg 6: save_plot --> True or Flase. When true, saves plot to data directory.  
# Function inputs arg 7: display_plot --> True or Flase. When true, displays the plot. 
# Function output: Figure with the confusion matrix. 
def plot_confusion_matrix(cm, 
                          classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues,
                          save_plot=True, 
                          display_plot=True):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True')
    plt.xlabel('Predicted')
    
    # Save the plot if the user desires it.
    if save_plot:
        current_directory = os.getcwd()
        file_path = current_directory.replace('logistic-classification-package', 'img')
        file_path = os.path.join(file_path, 'BCE_calculated_loss.png')
        plt.savefig(file_path, dpi=200, bbox_inches='tight')
    
    # Display the plot if the user desires it. 
    if (display_plot == False):
        plt.close()
    else:
        plt.show()   

In [9]:
from sklearn import datasets 
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
import torch
import torch.nn as nn
import math
import numpy as np 

# A function which trains itself to predict whether a cancer is metastatic or not by using aneural netowrk. 
def neural_network(): 
    
    ##### (1) Load and prepare data. 
    data =  datasets.load_breast_cancer()
    x, y = data.data, data.target
    
    # Get data dimensions. 
    _, num_features = x.shape
    
    # Split the data into training data and testing data.
    x_training, x_testing, y_training, y_testing = train_test_split(x, y, test_size=0.33, random_state=1234)

    # Scale the data. 
    x_training = StandardScaler().fit_transform(x_training)
    x_testing = StandardScaler().fit_transform(x_testing)
    
    # Convert data to tensors.
    x_training = torch.from_numpy(x_training.astype(np.float32))
    x_testing = torch.from_numpy(x_testing.astype(np.float32))
    
    num_samples = y_training.shape
    y_training = torch.from_numpy(y_training.reshape(num_samples[0], 1))
    y_training = y_training.type(torch.float32)
                                 
    num_samples = y_testing.shape
    y_testing = torch.from_numpy(y_testing.reshape(num_samples[0], 1))
    y_testing = y_testing.type(torch.float32)     

    ##### (2) Create our model. 
    class NeuralNetwork(nn.Module): 
        def __init__(self, num_features): 
            super(NeuralNetwork, self).__init__()
            self.linear_1 = nn.Linear(num_features, math.floor(num_features/2))
            self.linear_2 = nn.Linear(math.floor(num_features/2), math.floor(num_features/4))
            self.linear_3 = nn.Linear(math.floor(num_features/4), 1)

            self.sigmoid = nn.Sigmoid()
            
        def forward(self, x):
            output_1 = self.sigmoid(self.linear_1(x))
            output_2 = self.sigmoid(self.linear_2(output_1))
            y_predicted = self.sigmoid(self.linear_3(output_2))
            return y_predicted 
        
    # Create an instance of our model. 
    model = NeuralNetwork(num_features)
    
    ##### (3) Establish the loss and the optimiser. 
    calc_loss = nn.BCELoss() # Use built in binary cross entropy loss function from PyTorch.
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01) # We're using stochastic gradient descent. 
   
    ##### (4) Training loop. 
    num_epochs = 10000
    loss_array = []
    for epoch in range(num_epochs):
    
        # Forward pass: compute the output of the layers given the input data
        y_predicted = model(x_training)
        loss = calc_loss(y_predicted, y_training)
        
        # Log the loss per epoch.
        loss_value = loss.detach().numpy()
        loss_value = loss_value.item()
        loss_array.append(loss_value)
        
        # Backward pass: compute the output error with respect to the expected output and then go backward into the network and update the weights using gradient descent.
        loss.backward()
        
        # Update the weights.
        optimizer.step()

        # Zero out the gradients. 
        optimizer.zero_grad()
    
    ##### (5) Test the model. 
    with torch.no_grad():
        y_predicted = model(x_testing)
        y_predicted_classes = y_predicted.round()
        accuracy = y_predicted_classes.eq(y_testing).sum().detach().numpy() / float(y_testing.shape[0])

    ##### (6) Plot data associated with the model. 
    
    # Plot the loss graph. 
    loss_graph(num_epochs, loss_array, False, True)
    
    # Plot the confusion matrix.
    confusion = confusion_matrix(y_testing.detach().numpy(), y_predicted_classes.detach().numpy())
    names = ('Malignant', 'Begnin')
    plt.figure()
    plot_confusion_matrix(confusion, names, save_plot=False, display_plot=False)
    
    
    ##### (7) Return data. 
    y_predicted_classes = y_predicted_classes.detach().numpy()
    y_testing = y_testing.detach().numpy()
    
    return model, accuracy, y_predicted_classes, y_testing
    