In [None]:
##CS404 Artifical Intelligence Project
##Supplementary code to go with paper
##Adnan Silajdzic
##Edin Ziga
##Mirza Redzepovic

In [None]:
!pip install pillow
!pip install tqdm
!pip install numpy
!pip install tensorflow
!pip install pydot
!pip install pydot_ng 

In [34]:
import os
import numpy as np
import pydot
import graphviz
import random

from PIL import Image
from tqdm import tqdm

#TensorFlow implementation
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model

In [35]:
#LOAD IMAGES

# Parameters
imageSize = 48
# Set the desired size (in this case, 28x28)
size = (imageSize, imageSize)

#Directory of chest_xray\train\NORMAL
directory = r"D:\PythonProjects\PneumoniaImagesANN\chest_xray\train\NORMAL"

NormalTraining = []
for filename in tqdm(os.listdir(directory), desc = "Loading Normal Training Data"):
    # Load the image
    image = Image.open(os.path.join(directory, filename))
    image = image.convert('L')
    image = image.resize(size)
    pixels = list(image.getdata())
    pixels_normalized = [pixel/255.0 for pixel in pixels]
    NormalTraining.append(pixels_normalized)
    
#Directory of chest_xray\train\PNEUMONIA
directory = r"D:\PythonProjects\PneumoniaImagesANN\chest_xray\train\PNEUMONIA"

PneumoniaTraining = []
for filename in tqdm(os.listdir(directory), desc = "Loading Pneumonia Training Data"):
    image = Image.open(os.path.join(directory, filename))
    image = image.convert('L')
    image = image.resize(size)
    pixels = list(image.getdata())
    pixels_normalized = [pixel/255.0 for pixel in pixels]
    PneumoniaTraining.append(pixels_normalized)

#Directory of chest_xray\test\NORMAL
directory = r"D:\PythonProjects\PneumoniaImagesANN\chest_xray\test\NORMAL"  
    
NormalTesting = []
for filename in tqdm(os.listdir(directory), desc = "Loading Normal Testing Data"):
    image = Image.open(os.path.join(directory, filename))
    image = image.convert('L')
    image = image.resize(size)
    pixels = list(image.getdata())
    pixels_normalized = [pixel/255.0 for pixel in pixels]
    NormalTesting.append(pixels_normalized)

#Directory of chest_xray\test\PNEUMONIA
directory = r"D:\PythonProjects\PneumoniaImagesANN\chest_xray\test\PNEUMONIA"
    
PneumoniaTesting = []
for filename in tqdm(os.listdir(directory), desc = "Loading Pneumonia Testing Data"):
    # Load the image
    image = Image.open(os.path.join(directory, filename))
    image = image.convert('L')
    image = image.resize(size)
    pixels = list(image.getdata())
    pixels_normalized = [pixel/255.0 for pixel in pixels]
    PneumoniaTesting.append(pixels_normalized)
    
print("Images successfully loaded")
print(f"All images are grayscaled and are {imageSize} by {imageSize}")

training_data = NormalTraining + PneumoniaTraining
testing_data = NormalTesting + PneumoniaTesting

training_labels = [0]*len(NormalTraining) + [1]*len(PneumoniaTraining)
testing_labels = [0]*len(NormalTesting) + [1]*len(PneumoniaTesting)

print(f"Training data size - {len(training_data)}")
print(f"Testing data size - {len(testing_data)}")

Loading Normal Training Data: 100%|████████████████████████████████████████████████| 1341/1341 [00:16<00:00, 79.59it/s]
Loading Pneumonia Training Data: 100%|████████████████████████████████████████████| 3875/3875 [00:15<00:00, 252.80it/s]
Loading Normal Testing Data: 100%|██████████████████████████████████████████████████| 234/234 [00:02<00:00, 113.41it/s]
Loading Pneumonia Testing Data: 100%|███████████████████████████████████████████████| 390/390 [00:01<00:00, 298.02it/s]

Images successfully loaded
All images are grayscaled and are 48 by 48
Training data size - 5216
Testing data size - 624





In [36]:
# Dense layer
class Layer_Dense :
    # Layer initialization
    def __init__ ( self , n_inputs , n_neurons ,
                weight_regularizer_l1 = 0 , weight_regularizer_l2 = 0 ,
                bias_regularizer_l1 = 0 , bias_regularizer_l2 = 0 ):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros(( 1 , n_neurons))
        # Set regularization strength
        self.weight_regularizer_l1 = weight_regularizer_l1
        self.weight_regularizer_l2 = weight_regularizer_l2
        self.bias_regularizer_l1 = bias_regularizer_l1
        self.bias_regularizer_l2 = bias_regularizer_l2
    # Forward pass
    def forward ( self , inputs ):
        # Remember input values
        self.inputs = inputs
        # Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases
    # Backward pass
    def backward ( self , dvalues ):
        # Gradients on parameters
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis = 0 , keepdims = True )
        # Gradients on regularization
        # L1 on weights
        if self.weight_regularizer_l1 > 0 :
            dL1 = np.ones_like(self.weights)
            dL1[self.weights < 0 ] = - 1
            self.dweights += self.weight_regularizer_l1 * dL1
        # L2 on weights
        if self.weight_regularizer_l2 > 0 :
            self.dweights += 2 * self.weight_regularizer_l2 * \
            self.weights
        # L1 on biases
        if self.bias_regularizer_l1 > 0 :
            dL1 = np.ones_like(self.biases)
            dL1[self.biases < 0 ] = - 1
            self.dbiases += self.bias_regularizer_l1 * dL1
        # L2 on biases
        if self.bias_regularizer_l2 > 0 :
            self.dbiases += 2 * self.bias_regularizer_l2 * \
            self.biases
            # Gradient on values
        self.dinputs = np.dot(dvalues, self.weights.T)
        

In [37]:
# Dropout
class Layer_Dropout :
    # Init
    def __init__ ( self , rate ):
        # Store rate, we invert it as for example for dropout
        # of 0.1 we need success rate of 0.9
        self.rate = 1 - rate
    # Forward pass
    def forward ( self , inputs ):
        # Save input values
        self.inputs = inputs
        # Generate and save scaled mask
        self.binary_mask = np.random.binomial( 1 , self.rate,
        size = inputs.shape) / self.rate
        # Apply mask to output values
        self.output = inputs * self.binary_mask
    # Backward pass
    def backward ( self , dvalues ):
        # Gradient on values
        self.dinputs = dvalues * self.binary_mask

In [38]:
# ReLU activation
class Activation_ReLU :
    # Forward pass
    def forward ( self , inputs ):
        # Remember input values
        self.inputs = inputs
        # Calculate output values from inputs
        self.output = np.maximum( 0 , inputs)
    # Backward pass
    def backward ( self , dvalues ):
        # Since we need to modify original variable,
        # let's make a copy of values first
        self.dinputs = dvalues.copy()
        # Zero gradient where input values were negative
        self.dinputs[self.inputs <= 0 ] = 0

In [39]:
class Activation_TanH:
    # Forward pass
    def forward(self, inputs):
        self.output = np.tanh(inputs)
        
    # Backward pass
    def backward(self, dvalues):
        # Calculate derivative of tanh function
        derivative = 1 - self.output**2
        # Chain rule
        self.dinputs = dvalues * derivative


In [40]:
class Activation_Softmax :
    # Forward pass
    def forward ( self , inputs ):
        # Remember input values
        self.inputs = inputs
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis = 1 ,
        keepdims = True ))
        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis = 1 ,
        keepdims = True )
        self.output = probabilities
    # Backward pass
    def backward ( self , dvalues ):
        # Create uninitialized array
        self.dinputs = np.empty_like(dvalues)
        # Enumerate outputs and gradients
        for index, (single_output, single_dvalues) in \
            enumerate ( zip (self.output, dvalues)):
            # Flatten output array
            single_output = single_output.reshape( - 1 , 1 )
            # Calculate Jacobian matrix of the output and
            jacobian_matrix = np.diagflat(single_output) - \
            np.dot(single_output, single_output.T)
            # Calculate sample-wise gradient
            # and add it to the array of sample gradients
            self.dinputs[index] = np.dot(jacobian_matrix,
            single_dvalues)

In [41]:
class Activation_Sigmoid :
    # Forward pass
    def forward ( self , inputs ):
        # Save input and calculate/save output
        # of the sigmoid function
        self.inputs = inputs
        self.output = 1 / ( 1 + np.exp( - inputs))
    # Backward pass
    def backward ( self , dvalues ):
        # Derivative - calculates from output of the sigmoid function
        self.dinputs = dvalues * ( 1 - self.output) * self.output

In [42]:
# SGD optimizer
class Optimizer_SGD :
    # Initialize optimizer - set settings,
    # learning rate of 1. is default for this optimizer
    def __init__ ( self , learning_rate = 1. , decay = 0. , momentum = 0. ):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.momentum = momentum
    # Call once before any parameter updates
    def pre_update_params ( self ):
        if self.decay:
            self.current_learning_rate = self.learning_rate * \
            ( 1. / ( 1. + self.decay * self.iterations))
            
    # Update parameters
    def update_params ( self , layer ):
        # If we use momentum
        if self.momentum:
            # If layer does not contain momentum arrays, create them
            # filled with zeros
            if not hasattr (layer, 'weight_momentums' ):
                layer.weight_momentums = np.zeros_like(layer.weights)
                # If there is no momentum array for weights
                # The array doesn't exist for biases yet either.
                layer.bias_momentums = np.zeros_like(layer.biases)
            # Build weight updates with momentum - take previous
            # updates multiplied by retain factor and update with
            # current gradients
            weight_updates = \
                self.momentum * layer.weight_momentums - \
                self.current_learning_rate * layer.dweights
            layer.weight_momentums = weight_updates
            # Build bias updates
            bias_updates = \
                self.momentum * layer.bias_momentums - \
                self.current_learning_rate * layer.dbiases
            layer.bias_momentums = bias_updates
            # Vanilla SGD updates (as before momentum update)
        else:
            weight_updates = - self.current_learning_rate * \
            layer.dweights
            bias_updates = - self.current_learning_rate * \
            layer.dbiases
        # Update weights and biases using either
        # vanilla or momentum updates
        layer.weights += weight_updates
        layer.biases += bias_updates
    # Call once after any parameter updates
    def post_update_params ( self ):
        self.iterations += 1

In [43]:
# Adagrad optimizer
class Optimizer_Adagrad :
    # Initialize optimizer - set settings
    def __init__ ( self , learning_rate = 1. , decay = 0. , epsilon = 1e-7 ):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.epsilon = epsilon
    # Call once before any parameter updates
    def pre_update_params ( self ):
        if self.decay:
            self.current_learning_rate = self.learning_rate * \
            ( 1. / ( 1. + self.decay * self.iterations))
    # Update parameters
    def update_params ( self , layer ):
        # If layer does not contain cache arrays,
        # create them filled with zeros
        if not hasattr (layer, 'weight_cache' ):
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_cache = np.zeros_like(layer.biases)
        # Update cache with squared current gradients
        layer.weight_cache += layer.dweights ** 2
        layer.bias_cache += layer.dbiases ** 2
        # Vanilla SGD parameter update + normalization
        # with square rooted cache
        layer.weights += - self.current_learning_rate * \
        layer.dweights / \
        (np.sqrt(layer.weight_cache) + self.epsilon)
        layer.biases += - self.current_learning_rate * \
        layer.dbiases / \
        (np.sqrt(layer.bias_cache) + self.epsilon)
        # Call once after any parameter updates
    def post_update_params ( self ):
        self.iterations += 1

In [44]:
# RMSprop optimizer
class Optimizer_RMSprop :
    # Initialize optimizer - set settings
    def __init__ ( self , learning_rate = 0.001 , decay = 0. , epsilon = 1e-7 ,
    rho = 0.9 ):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.epsilon = epsilon
        self.rho = rho
    # Call once before any parameter updates
    def pre_update_params ( self ):
        if self.decay:
            self.current_learning_rate = self.learning_rate * \
            ( 1. / ( 1. + self.decay * self.iterations))
    # Update parameters
    def update_params ( self , layer ):
        # If layer does not contain cache arrays,
        # create them filled with zeros
        if not hasattr (layer, 'weight_cache' ):
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_cache = np.zeros_like(layer.biases)
        # Update cache with squared current gradients
        layer.weight_cache = self.rho * layer.weight_cache + \
        ( 1 - self.rho) * layer.dweights ** 2
        layer.bias_cache = self.rho * layer.bias_cache + \
        ( 1 - self.rho) * layer.dbiases ** 2
        # Vanilla SGD parameter update + normalization
        # with square rooted cache
        layer.weights += - self.current_learning_rate * \
        layer.dweights / \
        (np.sqrt(layer.weight_cache) + self.epsilon)
        layer.biases += - self.current_learning_rate * \
        layer.dbiases / \
        (np.sqrt(layer.bias_cache) + self.epsilon)
        # Call once after any parameter updates
    def post_update_params ( self ):
        self.iterations += 1

In [45]:
# Adam optimizer
class Optimizer_Adam :
    # Initialize optimizer - set settings
    def __init__ ( self , learning_rate = 0.001 , decay = 0. , epsilon = 1e-7 ,
    beta_1 = 0.9 , beta_2 = 0.999 ):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.epsilon = epsilon
        self.beta_1 = beta_1
        self.beta_2 = beta_2
    # Call once before any parameter updates
    def pre_update_params ( self ):
        if self.decay:
            self.current_learning_rate = self.learning_rate * \
            ( 1. / ( 1. + self.decay * self.iterations))
    # Update parameters
    def update_params ( self , layer ):
        # If layer does not contain cache arrays,
        # create them filled with zeros
        if not hasattr (layer, 'weight_cache' ):
            layer.weight_momentums = np.zeros_like(layer.weights)
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_momentums = np.zeros_like(layer.biases)
            layer.bias_cache = np.zeros_like(layer.biases)
        # Update momentum with current gradients
        layer.weight_momentums = self.beta_1 * \
            layer.weight_momentums + \
            ( 1 - self.beta_1) * layer.dweights
        layer.bias_momentums = self.beta_1 * \
            layer.bias_momentums + \
            ( 1 - self.beta_1) * layer.dbiases
        # Get corrected momentum
        # self.iteration is 0 at first pass
        # and we need to start with 1 here
        weight_momentums_corrected = layer.weight_momentums / \
            ( 1 - self.beta_1 ** (self.iterations + 1 ))
        bias_momentums_corrected = layer.bias_momentums / \
            ( 1 - self.beta_1 ** (self.iterations + 1 ))
        # Update cache with squared current gradients
        layer.weight_cache = self.beta_2 * layer.weight_cache + \
            ( 1 - self.beta_2) * layer.dweights ** 2
        layer.bias_cache = self.beta_2 * layer.bias_cache + \
            ( 1 - self.beta_2) * layer.dbiases ** 2
        # Get corrected cache
        weight_cache_corrected = layer.weight_cache / \
            ( 1 - self.beta_2 ** (self.iterations + 1 ))
        bias_cache_corrected = layer.bias_cache / \
            ( 1 - self.beta_2 ** (self.iterations + 1 ))
        # Vanilla SGD parameter update + normalization
        # with square rooted cache
        layer.weights += - self.current_learning_rate * \
            weight_momentums_corrected / \
            (np.sqrt(weight_cache_corrected) +
            self.epsilon)
        layer.biases += - self.current_learning_rate * \
                bias_momentums_corrected / \
                (np.sqrt(bias_cache_corrected) +
                self.epsilon)
    # Call once after any parameter updates
    def post_update_params ( self ):
        self.iterations += 1

In [46]:
# Common loss class
class Loss :
    # Regularization loss calculation
    def regularization_loss ( self , layer ):
        # 0 by default
        regularization_loss = 0
        # L1 regularization - weights
        # calculate only when factor greater than 0
        if layer.weight_regularizer_l1 > 0 :
            regularization_loss += layer.weight_regularizer_l1 * \
            np.sum(np.abs(layer.weights))
        # L2 regularization - weights
        if layer.weight_regularizer_l2 > 0 :
            regularization_loss += layer.weight_regularizer_l2 * \
            np.sum(layer.weights * \
            layer.weights)
        # L1 regularization - biases
        # calculate only when factor greater than 0
        if layer.bias_regularizer_l1 > 0 :
            regularization_loss += layer.bias_regularizer_l1 * \
            np.sum(np.abs(layer.biases))
        # L2 regularization - biases
        if layer.bias_regularizer_l2 > 0 :
            regularization_loss += layer.bias_regularizer_l2 * \
            np.sum(layer.biases * \
            layer.biases)
        return regularization_loss
    # Calculates the data and regularization losses
    # given model output and ground truth values
    def calculate ( self , output , y ):
        # Calculate sample losses
        sample_losses = self.forward(output, y)
        # Calculate mean loss
        data_loss = np.mean(sample_losses)
        # Return loss
        return data_loss

In [47]:
# Cross-entropy loss
class Loss_CategoricalCrossentropy ( Loss ):
    # Forward pass
    def forward ( self , y_pred , y_true ):
        # Number of samples in a batch
        samples = len (y_pred)
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7 , 1 - 1e-7 )
        # Probabilities for target values -
        # only if categorical labels
        if len (y_true.shape) == 1 :
            correct_confidences = y_pred_clipped[
            range (samples),
            y_true
            ]
        # Mask values - only for one-hot encoded labels
        elif len (y_true.shape) == 2 :
            correct_confidences = np.sum(
            y_pred_clipped * y_true,
            axis = 1
            )
            # Losses
        negative_log_likelihoods = - np.log(correct_confidences)
        return negative_log_likelihoods

    # Backward pass
    def backward ( self , dvalues , y_true ):
        # Number of samples
        samples = len (dvalues)
        # Number of labels in every sample
        # We'll use the first sample to count them
        labels = len (dvalues[ 0 ])
        # If labels are sparse, turn them into one-hot vector
        if len (y_true.shape) == 1 :
            y_true = np.eye(labels)[y_true]
        # Calculate gradient
        self.dinputs = - y_true / dvalues
        # Normalize gradient
        self.dinputs = self.dinputs / samples

In [48]:
# Softmax classifier - combined Softmax activation
# and cross-entropy loss for faster backward step
class Activation_Softmax_Loss_CategoricalCrossentropy ():
    # Creates activation and loss function objects
    def __init__ ( self ):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossentropy()
    # Forward pass
    def forward ( self , inputs , y_true ):
        # Output layer's activation function
        self.activation.forward(inputs)
        # Set the output
        self.output = self.activation.output
        # Calculate and return loss value
        return self.loss.calculate(self.output, y_true)
    # Backward pass
    def backward ( self , dvalues , y_true ):
        # Number of samples
        samples = len (dvalues)
        # If labels are one-hot encoded,
        # turn them into discrete values
        if len (y_true.shape) == 2 :
            y_true = np.argmax(y_true, axis = 1 )
        # Copy so we can safely modify
        self.dinputs = dvalues.copy()
        # Calculate gradient
        self.dinputs[ range (samples), y_true] -= 1
        # Normalize gradient
        self.dinputs = self.dinputs / samples

In [49]:
# Binary cross-entropy loss
class Loss_BinaryCrossentropy ( Loss ):
    # Forward pass
    def forward ( self , y_pred , y_true ):
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7 , 1 - 1e-7 )
        # Calculate sample-wise loss
        sample_losses = - (y_true * np.log(y_pred_clipped) +
        ( 1 - y_true) * np.log( 1 - y_pred_clipped))
        sample_losses = np.mean(sample_losses, axis =- 1 )
        # Return losses
        return sample_losses
    # Backward pass
    def backward ( self , dvalues , y_true ):
        # Number of samples
        samples = len (dvalues)
        # Number of outputs in every sample
        # We'll use the first sample to count them
        outputs = len (dvalues[ 0 ])
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        clipped_dvalues = np.clip(dvalues, 1e-7 , 1 - 1e-7 )
        # Calculate gradient
        self.dinputs = - (y_true / clipped_dvalues -
        ( 1 - y_true) / ( 1 - clipped_dvalues)) / outputs
        # Normalize gradient
        self.dinputs = self.dinputs / samples

In [88]:
#RANDOM SAMPLING WITH REPLACEMENT
print(f'Training data full size - {len(training_data)}')
print(f'Testing data full size - {len(testing_data)}')

#Parameters
random.seed(10)
trainingDataSize = 1800
testingDataSize = 200
noEpochs = 40

print(f'Training data sample size - {trainingDataSize}')
print(f'Testing data sample size - {testingDataSize}')
print(f'No. epochs selected - {noEpochs}')

training_data_sample=[]
training_labels_sample=[]

testing_data_sample=[]
testing_labels_sample=[]

for i in tqdm(range(trainingDataSize), desc = "Loading Training Data"):
    randNum = random.randint(0, 1)
    if randNum == 0:
        randData = random.randint(0, len(NormalTraining)-1)
        training_data_sample.append(NormalTraining[randData])
        training_labels_sample.append(0)
    else:
        randData = random.randint(0, len(PneumoniaTraining)-1)
        training_data_sample.append(PneumoniaTraining[randData])
        training_labels_sample.append(1)
        
        
for i in tqdm(range(testingDataSize), desc = "Loading Testing Data"):
    randNum = random.randint(0, 1)
    if randNum == 0:
        randData = random.randint(0, len(NormalTesting)-1)
        testing_data_sample.append(NormalTesting[randData])
        testing_labels_sample.append(0)
    else:
        randData = random.randint(0, len(PneumoniaTesting)-1)
        testing_data_sample.append(PneumoniaTesting[randData])
        testing_labels_sample.append(1)
        
y=np.array(training_labels_sample)
y = y.reshape( - 1 , 1 )

dense1 = Layer_Dense( imageSize*imageSize , 1024 , weight_regularizer_l2 = 5e-5 ,
                    bias_regularizer_l2 = 5e-5 )
dense2 = Layer_Dense( 1024 , 512 )
dense3 = Layer_Dense( 512 , 256 )
dense4 = Layer_Dense( 256 , 1 )

activation1 = Activation_TanH()
activation2 = Activation_TanH()
activation3 = Activation_TanH()
activation4 = Activation_Sigmoid()

# Create loss function
loss_function = Loss_BinaryCrossentropy()
# Create optimizer
optimizer = Optimizer_Adam( learning_rate = 0.001 , decay = 0.0001 )

for epoch in tqdm(range(1,noEpochs+1), desc = "Training Network"):
    dense1.forward(np.array(training_data_sample))
    activation1.forward(dense1.output)
    
    dense2.forward(activation1.output)
    activation2.forward(dense2.output)
    
    dense3.forward(activation2.output)
    activation3.forward(dense3.output)
    
    dense4.forward(activation3.output)
    activation4.forward(dense4.output)
    
    data_loss = loss_function.calculate(activation4.output, y)
    regularization_loss = \
        loss_function.regularization_loss(dense1) + \
        loss_function.regularization_loss(dense2)
    loss = data_loss + regularization_loss
    
    predictions = (activation2.output > 0.5 ) * 1
    accuracy = np.mean(predictions == y)
    
    if not epoch % 5 :
        print ( f'epoch: {epoch} , ' +
        f'acc: {accuracy:.3f} , ' +
        f'loss: {loss:.3f} (' +
        f'data_loss: {data_loss:.3f} , ' +
        f'reg_loss: {regularization_loss:.3f} ), ' +
        f'lr: {optimizer.current_learning_rate} ' )
        
        
    # Backward pass
    loss_function.backward(activation4.output, y)
    activation4.backward(loss_function.dinputs)
    dense4.backward(activation4.dinputs)
    
    activation3.backward(dense4.dinputs)
    dense3.backward(activation3.dinputs)
    
    activation2.backward(dense3.dinputs)
    dense2.backward(activation2.dinputs)
    
    activation1.backward(dense2.dinputs)
    dense1.backward(np.array(activation1.dinputs))
    
    optimizer.pre_update_params()
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)
    optimizer.update_params(dense3)
    optimizer.update_params(dense4)
    optimizer.post_update_params()
    

    
y=np.array(testing_labels_sample)
y = y.reshape( - 1 , 1 )
dense1.forward(testing_data_sample)
activation1.forward(dense1.output)
    
dense2.forward(activation1.output)
activation2.forward(dense2.output)
    
dense3.forward(activation2.output)
activation3.forward(dense3.output)
    
dense4.forward(activation3.output)
activation4.forward(dense4.output)
    
loss = loss_function.calculate(activation4.output, y)

predictions = (activation4.output > 0.5 ) * 1
accuracy = np.mean(predictions == y)
print ( f'Testing accuracy: {accuracy: .3f} , ' +
            f'Testing loss: {loss: .3f} ' )    

Training data full size - 5216
Testing data full size - 624
Training data sample size - 1800
Testing data sample size - 200
No. epochs selected - 40


Loading Training Data: 100%|███████████████████████████████████████████████████| 1800/1800 [00:00<00:00, 300248.45it/s]
Loading Testing Data: 100%|██████████████████████████████████████████████████████████████████| 200/200 [00:00<?, ?it/s]
Training Network:  10%|██████▌                                                          | 4/40 [00:03<00:29,  1.24it/s]

epoch: 5 , acc: 0.528 , loss: 0.585 (data_loss: 0.574 , reg_loss: 0.011 ), lr: 0.000999700089973008 


Training Network:  22%|██████████████▋                                                  | 9/40 [00:07<00:25,  1.24it/s]

epoch: 10 , acc: 0.531 , loss: 0.675 (data_loss: 0.664 , reg_loss: 0.011 ), lr: 0.0009992006394884093 


Training Network:  35%|██████████████████████▍                                         | 14/40 [00:11<00:20,  1.25it/s]

epoch: 15 , acc: 0.543 , loss: 0.492 (data_loss: 0.480 , reg_loss: 0.012 ), lr: 0.0009987016878058523 


Training Network:  48%|██████████████████████████████▍                                 | 19/40 [00:15<00:17,  1.17it/s]

epoch: 20 , acc: 0.458 , loss: 0.369 (data_loss: 0.356 , reg_loss: 0.012 ), lr: 0.0009982032341784787 


Training Network:  60%|██████████████████████████████████████▍                         | 24/40 [00:19<00:13,  1.20it/s]

epoch: 25 , acc: 0.507 , loss: 0.332 (data_loss: 0.319 , reg_loss: 0.013 ), lr: 0.0009977052778609198 


Training Network:  72%|██████████████████████████████████████████████▍                 | 29/40 [00:23<00:09,  1.20it/s]

epoch: 30 , acc: 0.455 , loss: 0.279 (data_loss: 0.267 , reg_loss: 0.012 ), lr: 0.000997207818109294 


Training Network:  85%|██████████████████████████████████████████████████████▍         | 34/40 [00:28<00:04,  1.21it/s]

epoch: 35 , acc: 0.473 , loss: 0.221 (data_loss: 0.209 , reg_loss: 0.012 ), lr: 0.000996710854181202 


Training Network:  98%|██████████████████████████████████████████████████████████████▍ | 39/40 [00:32<00:00,  1.23it/s]

epoch: 40 , acc: 0.483 , loss: 0.189 (data_loss: 0.177 , reg_loss: 0.012 ), lr: 0.0009962143853357243 


Training Network: 100%|████████████████████████████████████████████████████████████████| 40/40 [00:32<00:00,  1.21it/s]

Testing accuracy:  0.800 , Testing loss:  0.417 





In [90]:
#RANDOM SAMPLING WITHOUT REPLACEMENT
print(f'Training data full size - {len(training_data)}')
print(f'Testing data full size - {len(testing_data)}')

#Parameters
random.seed(10)
trainingDataSize = 1800
testingDataSize = 200
noEpochs = 40

print(f'Training data sample size - {trainingDataSize}')
print(f'Testing data sample size - {testingDataSize}')
print(f'No. epochs selected - {noEpochs}')

training_data_sample=[]
training_labels_sample=[]

testing_data_sample=[]
testing_labels_sample=[]

# Training data
normal_indices = list(range(len(NormalTraining)))
pneumonia_indices = list(range(len(PneumoniaTraining)))

for i in tqdm(range(trainingDataSize), desc="Loading Training Data"):
    randNum = random.randint(0, 1)
    if randNum == 0 and len(normal_indices) > 0:
        randData = random.sample(normal_indices, 1)[0]
        normal_indices.remove(randData)
        training_data_sample.append(NormalTraining[randData])
        training_labels_sample.append(0)
    elif randNum == 1 and len(pneumonia_indices) > 0:
        randData = random.sample(pneumonia_indices, 1)[0]
        pneumonia_indices.remove(randData)
        training_data_sample.append(PneumoniaTraining[randData])
        training_labels_sample.append(1)
        
# Testing data
normal_indices = list(range(len(NormalTesting)))
pneumonia_indices = list(range(len(PneumoniaTesting)))

for i in tqdm(range(testingDataSize), desc="Loading Testing Data"):
    randNum = random.randint(0, 1)
    if randNum == 0 and len(normal_indices) > 0:
        randData = random.sample(normal_indices, 1)[0]
        normal_indices.remove(randData)
        testing_data_sample.append(NormalTesting[randData])
        testing_labels_sample.append(0)
    elif randNum == 1 and len(pneumonia_indices) > 0:
        randData = random.sample(pneumonia_indices, 1)[0]
        pneumonia_indices.remove(randData)
        testing_data_sample.append(PneumoniaTesting[randData])
        testing_labels_sample.append(1)
        
y=np.array(training_labels_sample)
y = y.reshape( - 1 , 1 )

dense1 = Layer_Dense( imageSize*imageSize , 1024 , weight_regularizer_l2 = 5e-5 ,
                    bias_regularizer_l2 = 5e-5 )
dense2 = Layer_Dense( 1024 , 512 )
dense3 = Layer_Dense( 512 , 256 )
dense4 = Layer_Dense( 256 , 1 )

activation1 = Activation_TanH()
activation2 = Activation_TanH()
activation3 = Activation_TanH()
activation4 = Activation_Sigmoid()

# Create loss function
loss_function = Loss_BinaryCrossentropy()
# Create optimizer
optimizer = Optimizer_Adam( learning_rate = 0.001 , decay = 0.0001 )

for epoch in tqdm(range(1,noEpochs+1), desc = "Training Network"):
    dense1.forward(np.array(training_data_sample))
    activation1.forward(dense1.output)
    
    dense2.forward(activation1.output)
    activation2.forward(dense2.output)
    
    dense3.forward(activation2.output)
    activation3.forward(dense3.output)
    
    dense4.forward(activation3.output)
    activation4.forward(dense4.output)
    
    data_loss = loss_function.calculate(activation4.output, y)
    regularization_loss = \
        loss_function.regularization_loss(dense1) + \
        loss_function.regularization_loss(dense2)
    loss = data_loss + regularization_loss
    
    predictions = (activation2.output > 0.5 ) * 1
    accuracy = np.mean(predictions == y)
    
    if not epoch % 5 :
        print ( f'epoch: {epoch} , ' +
        f'acc: {accuracy:.3f} , ' +
        f'loss: {loss:.3f} (' +
        f'data_loss: {data_loss:.3f} , ' +
        f'reg_loss: {regularization_loss:.3f} ), ' +
        f'lr: {optimizer.current_learning_rate} ' )
        
        
    # Backward pass
    loss_function.backward(activation4.output, y)
    activation4.backward(loss_function.dinputs)
    dense4.backward(activation4.dinputs)
    
    activation3.backward(dense4.dinputs)
    dense3.backward(activation3.dinputs)
    
    activation2.backward(dense3.dinputs)
    dense2.backward(activation2.dinputs)
    
    activation1.backward(dense2.dinputs)
    dense1.backward(np.array(activation1.dinputs))
    
    optimizer.pre_update_params()
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)
    optimizer.update_params(dense3)
    optimizer.update_params(dense4)
    optimizer.post_update_params()
    

    
y=np.array(testing_labels_sample)
y = y.reshape( - 1 , 1 )
dense1.forward(testing_data_sample)
activation1.forward(dense1.output)
    
dense2.forward(activation1.output)
activation2.forward(dense2.output)
    
dense3.forward(activation2.output)
activation3.forward(dense3.output)
    
dense4.forward(activation3.output)
activation4.forward(dense4.output)
    
loss = loss_function.calculate(activation4.output, y)

predictions = (activation4.output > 0.5 ) * 1
accuracy = np.mean(predictions == y)
print ( f'Testing accuracy: {accuracy: .3f} , ' +
            f'Testing loss: {loss: .3f} ' )    

Training data full size - 5216
Testing data full size - 624
Training data sample size - 1800
Testing data sample size - 200
No. epochs selected - 40


Loading Training Data: 100%|████████████████████████████████████████████████████| 1800/1800 [00:00<00:00, 60401.36it/s]
Loading Testing Data: 100%|██████████████████████████████████████████████████████| 200/200 [00:00<00:00, 200062.20it/s]
Training Network:  10%|██████▌                                                          | 4/40 [00:03<00:29,  1.24it/s]

epoch: 5 , acc: 0.498 , loss: 0.646 (data_loss: 0.635 , reg_loss: 0.011 ), lr: 0.000999700089973008 


Training Network:  22%|██████████████▋                                                  | 9/40 [00:07<00:25,  1.20it/s]

epoch: 10 , acc: 0.529 , loss: 0.559 (data_loss: 0.548 , reg_loss: 0.011 ), lr: 0.0009992006394884093 


Training Network:  35%|██████████████████████▍                                         | 14/40 [00:12<00:24,  1.06it/s]

epoch: 15 , acc: 0.467 , loss: 0.384 (data_loss: 0.373 , reg_loss: 0.011 ), lr: 0.0009987016878058523 


Training Network:  48%|██████████████████████████████▍                                 | 19/40 [00:16<00:18,  1.16it/s]

epoch: 20 , acc: 0.584 , loss: 0.401 (data_loss: 0.390 , reg_loss: 0.011 ), lr: 0.0009982032341784787 


Training Network:  60%|██████████████████████████████████████▍                         | 24/40 [00:20<00:12,  1.24it/s]

epoch: 25 , acc: 0.485 , loss: 0.301 (data_loss: 0.289 , reg_loss: 0.011 ), lr: 0.0009977052778609198 


Training Network:  72%|██████████████████████████████████████████████▍                 | 29/40 [00:24<00:08,  1.25it/s]

epoch: 30 , acc: 0.515 , loss: 0.238 (data_loss: 0.227 , reg_loss: 0.011 ), lr: 0.000997207818109294 


Training Network:  85%|██████████████████████████████████████████████████████▍         | 34/40 [00:28<00:04,  1.24it/s]

epoch: 35 , acc: 0.544 , loss: 0.203 (data_loss: 0.193 , reg_loss: 0.011 ), lr: 0.000996710854181202 


Training Network:  98%|██████████████████████████████████████████████████████████████▍ | 39/40 [00:32<00:00,  1.20it/s]

epoch: 40 , acc: 0.558 , loss: 0.183 (data_loss: 0.173 , reg_loss: 0.011 ), lr: 0.0009962143853357243 


Training Network: 100%|████████████████████████████████████████████████████████████████| 40/40 [00:33<00:00,  1.20it/s]

Testing accuracy:  0.805 , Testing loss:  0.470 



