IMPORT THE NECESSARY STUFF

In [1]:
import numpy as np
from sklearn.utils import check_random_state
import os
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from sklearn.manifold import TSNE
from PIL import Image, ImageDraw, ImageFont

PREPROCESSING FOR MNIST DATA

In [2]:
class MNISTpreprocessing:
    def __init__(self):
        pass
        
    def normalize_images(self, images):
        normalized_images = images/255
        return normalized_images
    
    def encode_digits(self, digits, num_classes):
        n = digits.shape[0]
        encoded_digits = np.zeros((n, num_classes))
        encoded_digits[np.arange(n), digits] = 1
        return encoded_digits    
    
    def split_data(self, images, digits, test_ratio, seed=None):
        sep_index = images.shape[1]
        data = np.column_stack((images, digits))
        random_fixed = check_random_state(seed)
        random_fixed.shuffle(data)
        split_index = int(data.shape[0] * test_ratio)
        train_images, train_digits = data[split_index:, :sep_index], data[split_index:, sep_index:]
        test_images, test_digits = data[:split_index, :sep_index], data[:split_index, sep_index:]
        return train_images, test_images, train_digits, test_digits

In [3]:
file_name = 'mnist_train.csv'
file_path = 'path'
mnist_data = pd.read_csv(os.path.join(file_path, file_name))

In [4]:
images = np.array(mnist_data.drop(columns=['label']))
digits = np.array(mnist_data['label'])
print('Printing shapes for sanity check')
print('Images array:', images.shape, '\nDigits array:', digits.shape)
normalized_images = MNISTpreprocessing().normalize_images(images=images)
encoded_digits = MNISTpreprocessing().encode_digits(digits=digits, num_classes=10)
print('Original Digits array:', digits.shape, '\nDigits array after One-hot Encoding', encoded_digits.shape)
train_images, val_images, train_digits, val_digits = MNISTpreprocessing().split_data(normalized_images, encoded_digits, 1/6, 3)
print('Training Images array: ', train_images.shape, '\nValidation Images array: ', val_images.shape, '\nTraining Digits array: ', train_digits.shape, '\nValidation Digits array: ', val_digits.shape)

Printing shapes for sanity check
Images array: (60000, 784) 
Digits array: (60000,)
Original Digits array: (60000,) 
Digits array after One-hot Encoding (60000, 10)
Training Images array:  (50000, 784) 
Validation Images array:  (10000, 784) 
Training Digits array:  (50000, 10) 
Validation Digits array:  (10000, 10)


WEIGHT-BIAS INITIALIZERS FROM SCRATCH

In [5]:
class ZeroConstantInitializer:
    def __init__(
        self,
        constant=0.1,
        ):
        self.constant = constant
        
    def create_tensor(
        self,
        tensor_shape,
        seed=None
        ):
        n1, n2 = tensor_shape[:2]
        tensor = np.zeros((n1, n2)) + self.constant
        return tensor
    
    
class RandomUniformInitializer():
    def __init__(
        self,
        low=0,
        high=1,
        ):
        self.low = low
        self.high = high
        
    def create_tensor(
        self,
        tensor_shape,
        seed=None
        ):
        random_fixed = check_random_state(seed)        
        n1, n2 = tensor_shape[:2]    
        tensor = random_fixed.uniform(self.low, self.high, (n1, n2))
        return tensor                
    
    
class RandomNormalInitializer():
    def __init__(
        self,
        mean=0,
        std=1,
        ):
        self.mean = mean
        self.std = std            
        
    def create_tensor(
        self,
        tensor_shape,
        seed=None
        ):
        random_fixed = check_random_state(seed)
        n1, n2 = tensor_shape[:2]
        tensor = random_fixed.normal(self.mean, self.std, (n1, n2))
        return tensor


class XavierUniformInitializer():
    def __init__(
        self,
        ):
        pass                
        
    def create_tensor(
        self,
        tensor_shape,
        seed=None
        ):
        random_fixed = check_random_state(seed)
        n1, n2, n_in, n_out = tensor_shape
        bound = np.sqrt(6/(n_in+n_out))
        tensor = random_fixed.uniform(-bound, bound, (n1, n2))
        return tensor
        
        
class XavierNormalInitializer():
    def __init__(
        self,
        mean=0,
        ):
        self.mean = mean                       
        
    def create_tensor(
        self,
        tensor_shape,
        seed=None
        ):
        random_fixed = check_random_state(seed)
        n1, n2, n_in, n_out = tensor_shape        
        sigma = np.sqrt(2/(n_in+n_out))
        tensor = random_fixed.normal(self.mean, sigma, (n1, n2))
        return tensor
        
        
class HeUniformInitializer():
    def __init__(
        self,
        ):
        pass                
        
    def create_tensor(
        self,
        tensor_shape,
        seed=None
        ):
        random_fixed = check_random_state(seed)
        n1, n2, n_in = tensor_shape[:3]
        bound = np.sqrt(3/n_in)
        tensor = random_fixed.uniform(-bound, bound, (n1, n2))
        return tensor
        
        
class HeNormalInitializer():
    def __init__(
        self,
        mean=0,
        ):
        self.mean = mean                               
        
    def create_tensor(
        self,
        tensor_shape,
        seed=None
        ):
        random_fixed = check_random_state(seed)
        n1, n2, n_in = tensor_shape[:3]
        sigma = np.sqrt(2/n_in)
        tensor = random_fixed.normal(self.mean, sigma, (n1, n2))
        return tensor
        
        
class NetworkParams:        
    def __init__(
        self,      
        bias_initializer='zero_constant',
        weight_initializer='xavier_normal', 
        seed=None,
        ):
        self.bias_initializer = bias_initializer
        self.weight_initializer = weight_initializer
        self.seed = seed
        
    def generate_learnables(
        self,
        neurons_per_layer
        ):
        num_layers = len(neurons_per_layer)
        biases = [0]*(num_layers-1)
        weights = [0]*(num_layers-1)
        for i in range(num_layers-1):
            n1, n2 = neurons_per_layer[i], neurons_per_layer[i+1]
            n_in = neurons_per_layer[i]
            try:
                n_out = neurons_per_layer[i+2]
            except:
                n_out = 0
            bias_shape = (n2, 1, n_in, n_out)
            weight_shape = (n2, n1, n_in, n_out)
            bias_vector = Initializer().initialization_mapper(
                initialize_method=self.bias_initializer
                ).create_tensor(
                    tensor_shape=bias_shape,
                    seed=self.seed
                    )
            weight_matrix = Initializer().initialization_mapper(
                initialize_method=self.weight_initializer
                ).create_tensor(
                    tensor_shape=weight_shape,
                    seed=self.seed
                    )
            biases[i], weights[i] = bias_vector, weight_matrix
        return biases, weights


class Initializer:    
    def initialization_mapper(
        self,
        initialize_method
        ):
        initialize_dict = {
            'zero_constant' : ZeroConstantInitializer(),
            'random_uniform' : RandomUniformInitializer(),
            'random_normal' : RandomNormalInitializer(),
            'xavier_uniform' : XavierUniformInitializer(),
            'xavier_normal' : XavierNormalInitializer(),
            'he_uniform' : HeUniformInitializer(),
            'he_normal' : HeNormalInitializer(),
            }
        if type(initialize_method) == str:
            if initialize_method in initialize_dict:
                return initialize_dict[initialize_method]
            else:
                raise ValueError(
                    f"Unsupported initialization: {initialize_method}. Please choose from the following:\n{', '.join(initialize_dict.keys())}\nor pass the class instance as the value of the method variable."
                    )
        else:
            return initialize_method

HANDFUL ACTIVATION FUNCTIONS PLUS THEIR DERIVATIVES

In [6]:
class Sigmoid:
    def __init__(
        self,
        ):
        pass
        
    def calculate(
        self,
        logit,
        derivative=False
        ):
        value = (1+np.exp(-logit))**(-1)
        if derivative == True:
            derivative_value = np.multiply(value, (1-value))
            return derivative_value
        else:
            return value
        
class ReLU:
    def __init__(
        self,
        leak=0,
        ):
        pass
        self.leak = leak                
        
    def calculate(
        self,
        logit,
        derivative=False
        ):
        if derivative == True:
            return np.where(logit >= 0, 1, self.leak)
        else:
            return np.maximum(self.leak*logit, logit)
        

class Tanh:
    def __init__(
        self,
        ):
        pass
        
    def calculate(
        self,
        logit,
        derivative=False
        ):
        value = (np.exp(logit) - np.exp(-logit))/(np.exp(logit) + np.exp(-logit))
        if derivative == True:
            derivative_value = 1 - value**2
            return derivative_value
        else:
            return value    
        

class Softmax:
    def __init__(
        self,
        ):
        pass
        
    def calculate(
        self,
        logit,
        derivative=False
        ):
        value = np.exp(logit - np.max(logit, axis=0, keepdims=True)) / np.sum(np.exp(logit - np.max(logit, axis=0, keepdims=True)), axis=0, keepdims=True)
        if derivative == True:
            n = value.shape[0]
            derivative_value = np.zeros((n, n))
            for i in range(n):
                for j in range(n):
                    if i == j:
                        derivative_value[i, j] = value[i] * (1 - value[j])
                    else:
                        derivative_value[i, j] = value[i] * (-value[j])
            return derivative_value
        else:
            return value

class Activation:
    def activation_mapper(
        self,
        activation_choice
        ):
        activation_dict = {
            'sigmoid' : Sigmoid(),
            'relu' : ReLU(),
            'tanh' : Tanh(),
            'softmax' : Softmax(),
            }
        if type(activation_choice) == str:
            if activation_choice in activation_dict:
                return activation_dict[activation_choice]
            else:
                raise ValueError(
                    f"Unsupported activation: {activation_choice}. Please choose from the following:\n{', '.join(activation_dict.keys())}\nor pass the class instance as the value of the choice variable."
                    )
        else:
            return activation_choice

FORWARD PROPAGATION

In [7]:
class ForwardPropagation:
    def __init__(
        self,
        ):
        pass
    
    def perform(
        self,        
        input_batch,
        biases,
        weights,
        activation_function,
        num_classes
        ):
        logit_history, activation_history = [0]*(len(biases)), [0]*(len(biases))
        last_activation = input_batch.T        
        for i in range(len(biases)-1):
            logit = np.dot(weights[i], last_activation) + biases[i]
            activation = Activation().activation_mapper(activation_function).calculate(logit, derivative=False)
            logit_history[i], activation_history[i] = logit, activation
            last_activation = activation
        final_logit = np.dot(weights[-1], last_activation) + biases[-1]
        if num_classes >=3 :
            final_activation_function = 'softmax'
        elif num_classes == 1:
            final_activation_function = 'sigmoid'
        final_activation = Activation().activation_mapper(final_activation_function).calculate(final_logit, derivative=False)
        logit_history[-1], activation_history[-1] = final_logit, final_activation
        return logit_history, [input_batch.T]+activation_history

ENTROPY LOSS FUNCTION

In [8]:
class BinaryCrossEntropy:
    def __init__(
        self,
        epsilon=1e-15,
        ):
        self.epsilon = epsilon
        
    def calculate(
        self,
        y_true,
        y_pred,
        derivative=False
        ):
        y_pred += self.epsilon
        if derivative == True:
            loss_derivative = -(y_true - y_pred)
            return loss_derivative
        else:
            loss = -np.mean(y_true*np.log(y_pred) + (1-y_true)*np.log(y_pred), axis=0)[0]
            return loss
        
class CategoricalCrossEntropy:
    def __init__(
        self,
        epsilon=1e-15,
        ):
        self.epsilon = epsilon
        
    def calculate(
        self,
        y_true,
        y_pred,
        derivative=False
        ):
        y_pred += self.epsilon
        if derivative == True:
            loss_derivative = -(y_true - y_pred)
            return loss_derivative
        else:
            loss = -np.mean(np.sum(y_true*np.log(y_pred), axis=0), axis=0)
            return loss
            
    
class Loss:
    def score(
        self,
        input_batch,
        output_batch,
        biases,
        weights,
        activation_function,
        num_classes
        ):
        _, activations_history = ForwardPropagation().perform(
            input_batch=input_batch,
            biases=biases,
            weights=weights,
            activation_function=activation_function,
            num_classes=num_classes,
            )
        if num_classes >= 3:
            loss_type = CategoricalCrossEntropy()
        elif num_classes == 1:
            loss_type = BinaryCrossEntropy()
        return loss_type.calculate(
            y_true=output_batch.T,
            y_pred=activations_history[-1],
            derivative=False
            )

THE DADDY STEP: BACKPROPAGATION

In [9]:
class BackwardPropagation:
    def __init__(
        self,    
        ):
        pass
        
    def perform(
        self,
        output_batch,
        weights,
        logit_history,
        activation_history,
        activation_function,
        num_classes
        ):        
        grad_biases, grad_weights = [0]*len(logit_history), [0]*len(logit_history)
        output_reshaped = output_batch.T
        batch_size = output_batch.shape[0]
        if num_classes >= 3:
            delta = CategoricalCrossEntropy().calculate(
                y_true=output_reshaped,
                y_pred=activation_history[-1],
                derivative=True
                )
        elif num_classes == 1:
            delta = BinaryCrossEntropy().calculate(
                y_true=output_reshaped,
                y_pred=activation_history[-1],
                derivative=True
                )
        grad_biases[-1], grad_weights[-1] = np.mean(delta, axis=1, keepdims=True), np.dot(delta, activation_history[-2].T)/batch_size
        for i in range(2, len(logit_history)+1):
            delta = np.multiply(np.dot(weights[-i+1].T, delta), Activation().activation_mapper(
                activation_choice=activation_function
                ).calculate(
                    logit=logit_history[-i],
                    derivative=True
                    )
                )          
            grad_biases[-i], grad_weights[-i] = np.mean(delta, axis=1, keepdims=True), np.dot(delta, activation_history[-i-1].T)/batch_size
        return grad_biases, grad_weights

MINI-BATCH MOMENTUM-GRADIENT DESCENT (EXPLOITING STOCHASTICITY + FAST MATRIX COMPUTATIONS ALONGWITH ACCELERATED OPTIMIZATION)

In [10]:
class GradientDescent:
    def __init__(
        self,
        biases,
        weights,
        velocity_biases,
        velocity_weights,
        ):
        self.biases = biases
        self.weights = weights
        self.velocity_biases = velocity_biases
        self.velocity_weights = velocity_weights
        
    def perform(self,
                input_batch,
                output_batch,
                num_classes,
                activation_function,
                lr,
                momentum,
                batch_size
                ):       
        if batch_size != None:
            indices = np.random.choice(
                a=input_batch.shape[0],
                size=batch_size,
                replace=False
                )
            input_batch, output_batch = input_batch[indices], output_batch[indices]            
        logit_history, activation_history = ForwardPropagation().perform(
            input_batch=input_batch,
            biases=self.biases,
            weights=self.weights,
            activation_function=activation_function,
            num_classes=num_classes,
            )
        grad_biases, grad_weights = BackwardPropagation().perform(
            output_batch=output_batch,
            weights=self.weights,
            logit_history=logit_history,
            activation_history=activation_history,
            activation_function=activation_function,
            num_classes=num_classes,
            )        
        self.velocity_biases = [momentum*velocity_bias + (1-momentum)*grad_bias for velocity_bias, grad_bias in zip(self.velocity_biases, grad_biases)]    
        self.velocity_weights = [momentum*velocity_weight + (1-momentum)*grad_weight for velocity_weight, grad_weight in zip(self.velocity_weights, grad_weights)]    
        self.biases = [bias_vector - lr*velocity_bias for bias_vector, velocity_bias in zip(self.biases, self.velocity_biases)]
        self.weights = [weight_matrix - lr*velocity_weight for weight_matrix, velocity_weight in zip(self.weights, self.velocity_weights)]
        return self.biases, self.weights, self.velocity_biases, self.velocity_weights

ACCURACY METRIC 

In [11]:
class Accuracy:
    def score(
        self,
        input_batch,
        output_batch,
        biases,
        weights,
        activation_function,
        num_classes
        ):
        _, activations_history = ForwardPropagation().perform(
            input_batch=input_batch,
            biases=biases,
            weights=weights,
            activation_function=activation_function,
            num_classes=num_classes,
            )
        y_true = np.argmax(
            a=output_batch,
            axis=1
            )
        y_pred = np.argmax(
            a=activations_history[-1],
            axis=0
            )
        return np.mean(y_true == y_pred)

BRINGING ALL PARTS TOGETHER

In [12]:
class MLP:
    def __init__(
        self,
        neurons_per_layer,
        bias_initializer='zero_constant',
        weight_initializer='xavier_normal',
        activation_function='tanh',
        seed=None
        ):
        self.neurons_per_layer = neurons_per_layer
        self.num_classes = self.neurons_per_layer[-1]
        self.activation_function = activation_function
        self.seed = seed
        self.biases, self.weights = NetworkParams(
            bias_initializer=bias_initializer,
            weight_initializer=weight_initializer,
            seed=self.seed
            ).generate_learnables(
                neurons_per_layer=self.neurons_per_layer
                )
        self.train_accuracies, self.train_losses = [], []
        self.validation_accuracies, self.validation_losses = [], []
        
    def fit(
        self,
        training_data,
        epochs,
        validation_data=None,
        evaluation_interval=10,
        lr=0.001,
        momentum=0.9,
        batch_size=128,
        dont_print=False
        ):        
        input_batch, output_batch = training_data
        velocity_biases, velocity_weights = [np.zeros_like(i) for i in self.biases], [np.zeros_like(i) for i in self.weights]
        train_acc = Accuracy().score(
            input_batch=input_batch,
            output_batch=output_batch,
            biases=self.biases,
            weights=self.weights,
            activation_function=self.activation_function,
            num_classes=self.num_classes
            )
        train_loss = Loss().score(
            input_batch=input_batch,
            output_batch=output_batch,
            biases=self.biases,
            weights=self.weights,
            activation_function=self.activation_function,
            num_classes=self.num_classes
            )        
        self.train_accuracies.append(train_acc)
        self.train_losses.append(train_loss)
        if validation_data is not None:
            val_acc = Accuracy().score(validation_data[0], validation_data[1], self.biases, self.weights, self.activation_function, self.num_classes)
            val_loss = Loss().score(validation_data[0], validation_data[1], self.biases, self.weights, self.activation_function, self.num_classes)
            self.validation_accuracies.append(val_acc)
            self.validation_losses.append(val_loss)
            if dont_print is not True:
                print(f'Epoch: {0}/{epochs} - train_acc: {round(train_acc, 4)} - train_loss: {round(train_loss, 4)} - val_acc: {round(val_acc, 4)} - val_loss: {round(val_loss, 4)}')
        else:
            if dont_print is not True:
                print(f'Epoch: {0}/{epochs} - train_acc: {round(train_acc, 4)} - train_loss: {round(train_loss, 4)}')
        for i in range(epochs):
            self.biases, self.weights, velocity_biases, velocity_weights = GradientDescent(
                biases=self.biases,
                weights=self.weights,
                velocity_biases=velocity_biases,
                velocity_weights=velocity_weights
                ).perform(
                    input_batch=input_batch,
                    output_batch=output_batch,
                    num_classes=self.num_classes,
                    activation_function=self.activation_function,
                    lr=lr,
                    momentum=momentum,
                    batch_size=batch_size,
                    )
            if (i + 1) % evaluation_interval == 0:
                train_acc = Accuracy().score(
                    input_batch=input_batch,
                    output_batch=output_batch,
                    biases=self.biases,
                    weights=self.weights,
                    activation_function=self.activation_function,
                    num_classes=self.num_classes
                    )
                train_loss = Loss().score(
                    input_batch=input_batch,
                    output_batch=output_batch,
                    biases=self.biases,
                    weights=self.weights,
                    activation_function=self.activation_function,
                    num_classes=self.num_classes
                    )
                self.train_accuracies.append(train_acc)
                self.train_losses.append(train_loss)
                if validation_data is not None:
                    val_acc = Accuracy().score(
                        input_batch=validation_data[0],
                        output_batch=validation_data[1],
                        biases=self.biases,
                        weights=self.weights,
                        activation_function=self.activation_function,
                        num_classes=self.num_classes
                        )
                    val_loss = Loss().score(
                        input_batch=validation_data[0],
                        output_batch=validation_data[1],
                        biases=self.biases,
                        weights=self.weights,
                        activation_function=self.activation_function,
                        num_classes=self.num_classes
                        )
                    self.validation_accuracies.append(val_acc)
                    self.validation_losses.append(val_loss)
                    if dont_print is not True:
                        print(f'Epoch: {i + 1}/{epochs} - train_acc: {round(train_acc, 4)} - train_loss: {round(train_loss, 4)} - val_acc: {round(val_acc, 4)} - val_loss: {round(val_loss, 4)}')
                else:
                    if dont_print is not True:
                        print(f'Epoch: {i + 1}/{epochs} - train_acc: {round(train_acc, 4)} - train_loss: {round(train_loss, 4)}')
        return self.biases, self.weights
    
    def predict(
        self,
        input_batch
        ):
        _, activations_history = ForwardPropagation().perform(
            input_batch=input_batch,
            biases=self.biases,
            weights=self.weights,
            activation_function=self.activation_function,
            num_classes=self.num_classes,
            )
        return np.argmax(activations_history[-1], axis=0)

TESTING THE MODEL ON MNIST

In [16]:
model = MLP(
    neurons_per_layer=[784, 40, 30, 20, 10],
    weight_initializer='he_normal',
    activation_function='relu',
    seed=36
    )
biases, weights = model.fit(
    training_data=(train_images, train_digits),
    epochs=5000,
    validation_data=(val_images, val_digits),
    evaluation_interval=50,
    lr=0.009,
    momentum=0.9,
    batch_size=64,
    dont_print=False
)

Epoch: 0/5000 - train_acc: 0.1075 - train_loss: 2.5228 - val_acc: 0.1018 - val_loss: 2.526
Epoch: 50/5000 - train_acc: 0.3035 - train_loss: 2.0444 - val_acc: 0.3055 - val_loss: 2.0465
Epoch: 100/5000 - train_acc: 0.5033 - train_loss: 1.7065 - val_acc: 0.507 - val_loss: 1.7043
Epoch: 150/5000 - train_acc: 0.5888 - train_loss: 1.421 - val_acc: 0.5925 - val_loss: 1.4172
Epoch: 200/5000 - train_acc: 0.6591 - train_loss: 1.1895 - val_acc: 0.6607 - val_loss: 1.1871
Epoch: 250/5000 - train_acc: 0.7182 - train_loss: 1.017 - val_acc: 0.7208 - val_loss: 1.0159
Epoch: 300/5000 - train_acc: 0.7584 - train_loss: 0.8916 - val_acc: 0.7542 - val_loss: 0.8909
Epoch: 350/5000 - train_acc: 0.7817 - train_loss: 0.795 - val_acc: 0.7783 - val_loss: 0.7957
Epoch: 400/5000 - train_acc: 0.8017 - train_loss: 0.7251 - val_acc: 0.7987 - val_loss: 0.7273
Epoch: 450/5000 - train_acc: 0.8166 - train_loss: 0.6724 - val_acc: 0.8107 - val_loss: 0.6733
Epoch: 500/5000 - train_acc: 0.8308 - train_loss: 0.6238 - val_acc: 

PLOTTING THE MODEL METRICS

In [14]:
epochs = 5000
evaluation_interval = 50
skip_index_till = 6
width = 1
x = [i for i in range(0, epochs + 1, evaluation_interval)]
train_accuracy, validation_accuracy = model.train_accuracies, model.validation_accuracies
train_loss, validation_loss = model.train_losses, model.validation_losses
fig = make_subplots(rows=1, cols=2, subplot_titles=("Accuracy", "Loss"))
fig.add_trace(go.Scatter(x=x[skip_index_till:], y=train_accuracy[skip_index_till:], mode='lines', name='Train', line=dict(color='red', width=width)), row=1, col=1)
fig.add_trace(go.Scatter(x=x[skip_index_till:], y=validation_accuracy[skip_index_till:], mode='lines', name='Validation', line=dict(color='green', width=width)), row=1, col=1)
fig.add_trace(go.Scatter(x=x[skip_index_till:], y=train_loss[skip_index_till:], mode='lines', name='Train', line=dict(color='red', width=width), showlegend=False), row=1, col=2)
fig.add_trace(go.Scatter(x=x[skip_index_till:], y=validation_loss[skip_index_till:], mode='lines', name='Validation', line=dict(color='green', width=width), showlegend=False), row=1, col=2)
fig.update_layout(
    title='Training Metrics',
    width=1300,
    height=550,
    template='plotly_dark'
    )
fig.update_xaxes(title_text='Epochs', showline=True, linewidth=0.5, linecolor='white', mirror=True)
fig.update_yaxes(showline=True, linewidth=0.5, linecolor='white', mirror=True)
fig.show()

CREATING ANIMATED METRICS PLOT

In [15]:
frames = []
for i in range(len(x)):
    frame = go.Frame(data=[
        go.Scatter(x=x[skip_index_till:i], y=train_accuracy[skip_index_till:i], mode='lines', name='Train', line=dict(color='red', width=width)),
        go.Scatter(x=x[skip_index_till:i], y=validation_accuracy[skip_index_till:i], mode='lines', name='Validation', line=dict(color='green', width=width)),
        go.Scatter(x=x[skip_index_till:i], y=train_loss[skip_index_till:i], mode='lines', name='Train', line=dict(color='red', width=width), showlegend=False),
        go.Scatter(x=x[skip_index_till:i], y=validation_loss[skip_index_till:i], mode='lines', name='Validation', line=dict(color='green', width=width), showlegend=False)
        ])
    frames.append(frame)

fig.frames = frames

animation_settings = {
    'frame': {'duration': 100, 'redraw': False},
    'fromcurrent': True,
    'mode': 'immediate',
    }
fig.update_layout(
    updatemenus=[{
        'buttons': [
            {
                'args': [None, animation_settings],
                'label': 'Play',
                'method': 'animate'
                },
            {
                'args': [[None], animation_settings],
                'label': 'Pause',
                'method': 'animate'
                }
            ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0.1,
        'yanchor': 'top'
        }]
    )
fig.write_html('metric-animation.html')
fig.show()

PREPARING SUBSET OF MNIST (3 CLASSES IN 2 DIMENSIONS) FOR VISUALIZING LEARNT PARAMETERS

In [19]:
bool_ind = (mnist_data['label']==1) | (mnist_data['label']==4) | (mnist_data['label']==8)
sub_images_normalized = MNISTpreprocessing().normalize_images(mnist_data[bool_ind].drop(columns=['label']))
sub_digits = mnist_data[bool_ind]['label']
tsne = TSNE(
    n_components=2,
    random_state=10
    )
images_reduced = tsne.fit_transform(sub_images_normalized)
df = pd.DataFrame(
    {
        'd1': images_reduced[:, 0],
        'd2': images_reduced[:, 1],
        'digit': sub_digits
        }
    )
fig = px.scatter(
    df,
    x='d1',
    y='d2',
    color='digit',
    hover_data=['digit'],
    )
fig.update_layout(
    width=900,
    height=700,
    template='plotly_dark'
    )
fig.update_coloraxes(showscale=False)
fig.show()

CREATING NEURAL NETWORK LEARNING ANIMATION

In [27]:
sub_images_reduced_normalized = df[['d1', 'd2']].to_numpy()
sub_digits_encoded = pd.get_dummies(df['digit'], dtype=int).to_numpy()
train_sub_images, val_sub_images, train_sub_digits, val_sub_digits = MNISTpreprocessing().split_data(sub_images_reduced_normalized, sub_digits_encoded, 1/6, 3)
d1max = max(sub_images_reduced_normalized[:, 0])
d1min = min(sub_images_reduced_normalized[:, 0])
d2max = max(sub_images_reduced_normalized[:, 1])
d2min = min(sub_images_reduced_normalized[:, 1])
xx, yy = np.meshgrid(np.arange(d1min - 1, d1max + 1, 0.1), np.arange(d2min - 1, d2max + 1, 0.1))
z = np.array([xx.ravel(), yy.ravel()]).T
model = MLP(
    neurons_per_layer=[2,3,4,5,3],
    weight_initializer='xavier_normal',
    activation_function='tanh',
    seed=10
    )
def update_fig(predictions, epoch):
    fig = px.scatter(
        df,
        x='d1',
        y='d2',
        color='digit',
        hover_data=['digit'],
        )
    fig.update_traces(
        marker={
            'size': 4
            }
        )
    fig.update_layout(
        width=900,
        height=700,
        template='plotly_dark',
        yaxis_range=[d2min, d2max],
        xaxis_range=[d1min, d1max]
    )
    fig.update_coloraxes(showscale=False)
    fig.update_xaxes(visible=False)
    fig.update_yaxes(visible=False)
    fig.add_contour(
        x=xx.flatten(),
        y=yy.flatten(),
        z=predictions,
        opacity=0.5,
        showscale=False
        )
    fig.write_image(f'images3/{epoch+1}.png')
epochs = 500
evaluation_interval = 1
for i in range(0, epochs):
    biases, weights = model.fit(
        training_data=(train_sub_images, train_sub_digits),
        epochs=1,
        validation_data=(val_sub_images, val_sub_digits),
        evaluation_interval=1,
        lr=0.0003,
        momentum=0.5,
        batch_size=512,
        dont_print=True
    )
    predictions = model.predict(z)
    update_fig(predictions, i)
model = MLP(
    neurons_per_layer=[2,3,4,5,3],
    weight_initializer='xavier_normal',
    activation_function='tanh',
    seed=10
    )
biases, weights = model.fit(
    training_data=(train_sub_images, train_sub_digits),
    epochs=epochs,
    validation_data=(val_sub_images, val_sub_digits),
    evaluation_interval=1,
    lr=0.0003,
    momentum=0.5,
    batch_size=512,
    dont_print=True
    )
history = [round(i, 4) for i in model.validation_accuracies]

In [None]:
def create_animation(image_folder, output_path, frame_duration=100, loop=True):
    images = []
    font_size = 20
    for i in range(0, epochs):
        filename = f'{i+1}.png'
        image_path = os.path.join(image_folder, filename)
        img = Image.open(image_path)
        draw = ImageDraw.Draw(img)
        font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeMono.ttf", size=font_size)
        draw.text((90, 70), f'Epoch: {i+1}/{epochs} \nAccuracy: {history[i]}', fill='white', font=font)
        images.append(img)
    images[0].save(output_path, save_all=True, append_images=images[1:], duration=frame_duration, loop=0 if loop else 1)

if __name__ == "__main__":
    folder_path = "path"
    output_gif_path = "name"
    frame_duration_ms = 50
    loop_animation = False
    create_animation(folder_path, output_gif_path, frame_duration_ms, loop_animation)