# Imports and Variable Declarations

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import requests
import pickle

# Library

## Functions

### Activation Functions

In [2]:
class Sigmoid:
    def __init__(self):
        pass
    def __call__(self, x):
        return 1 / (1 + np.exp(-x))
    def derivative(self, x):
        return x * (1 - x)
    
class Tanh:
    def __init__(self):
        pass
    def __call__(self, x):
        return np.tanh(x)
    def derivative(self, x):
        return 1 - x ** 2
    
class ReLU:
    def __init__(self):
        pass
    def __call__(self, x):
        return np.maximum(0, x)
    def derivative(self, x):
        return np.where(x > 0, 1, 0)

class LeakyReLU:
    def __init__(self, alpha=0.01):
        self.alpha = alpha
    def __call__(self, x):
        return np.where(x > 0, x, x * self.alpha)
    def derivative(self, x):
        return np.where(x > 0, 1, self.alpha)
    
class Linear:
    def __init__(self):
        pass
    def __call__(self, x):
        return x
    def derivative(self, x):
        return 1
    
class SoftMax:
    def __init__(self):
        pass
    def __call__(self, x):
        exp = np.exp(x)
        return exp / np.sum(exp, axis=1, keepdims=True)
    def derivative(self, x):
        return x * (1 - x)

### Loss Functions

In [3]:
class Loss:
    def __init__(self):
        pass
    def __call__(self, y_true, y_pred):
        self.true, self.pred = y_true, y_pred
        return self
    def __str__(self):
        return str(self.value())
    def __repr__(self) -> str:
        return str(self.value())
    
class SquaredSumError(Loss):
    def __init__(self):
        super()
    def value(self):
        return 0.5*np.sum(np.square(self.true - self.pred))
    def derivative(self):
        return np.mean(self.pred - self.true, axis=0, keepdims=True)
    
class MeanSquaredError(Loss):
    def __init__(self):
        super()
    def value(self):
        return np.mean(np.square(self.true - self.pred))
    def derivative(self):
        return np.mean(self.pred - self.true, axis=0, keepdims=True)
    
class BinaryCrossEntropy(Loss):
    def __init__(self):
        super()
    def value(self):
        return -np.mean(self.true*np.log(self.pred) + (1-self.true)*np.log(1-self.pred))
    def derivative(self):
        return np.mean((self.pred - self.true) / (self.pred * (1.0 - self.pred)), axis=0, keepdims=True)
    
# class CategoricalCrossEntropy(Loss):
#     def __init__(self):
#         super()
#     def value(self):
#         print('Before')
#         value = np.mean(-np.sum(self.true * np.log(self.pred), axis=1), axis=0)
#         print('After')
#         return value
#         return np.mean(-np.sum(self.true * np.log(self.pred), axis=1), axis=0)
#         return -np.sum(self.true * np.log(self.pred), axis=1, keepdims=True)
#     def derivative(self):
#         return self.pred - self.true

class CategoricalCrossEntropy(Loss):
    def __init__(self):
        super()
    def value(self):
        epsilon = 1e-15  # Small constant to prevent numerical instability
        y_pred = np.clip(self.pred, epsilon, 1 - epsilon)  # Clip values to avoid log(0)
        loss = -np.sum(self.true * np.log(y_pred))
        return loss

    def derivative(self):
        epsilon = 1e-15  # Small constant to prevent numerical instability
        y_pred = np.clip(self.pred, epsilon, 1 - epsilon)  # Clip values to avoid log(0)
        return -self.true / y_pred

### Weight Initialization

In [4]:
class He:
    def __init__(self):
        pass
    def __call__(self, shape):
        return np.random.randn(*shape) * np.sqrt(2/shape[-1])
    
class Xavier:
    def __init__(self):
        pass
    def __call__(self, shape):
        return np.random.randn(*shape) * np.sqrt(1/shape[-1])
    
class Uniform:
    def __init__(self, low=-1, high=1):
        self.low, self.high = low, high
    def __call__(self, shape):
        return np.random.uniform(self.low, self.high, shape)
    
class Normal:
    def __init__(self, mean=0, std=1):
        self.mean, self.std = mean, std
    def __call__(self, shape):
        return np.random.randn(*shape)*self.std + self.mean

### Optimizers

In [5]:
class SGD:
    def __init__(self, model, lr=0.01):
        self.model = model
        self.lr = lr
    def step(self):
        gradients = self.model.get_gradients()
        weights = self.model.get_weights()
        new_weights = [None] * len(weights)
        for layer in range(len(weights)):
            new_weights[layer] = weights[layer] - self.lr * gradients[layer]
        self.model.update_weights(new_weights)
        
    
class ADAM:
    def __init__(self, model, lr=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.model = model
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.M = [np.zeros_like(layer) for layer in model.get_weights()]
        self.V = [np.zeros_like(layer) for layer in model.get_weights()]
        self.t = 0

    def step(self):
        self.t += 1
        gradients = self.model.get_gradients()
        weights = self.model.get_weights()
        new_weights = [None] * len(weights)
        for layer in range(len(weights)):
            self.M[layer] = self.beta1 * self.M[layer] + (1 - self.beta1) * gradients[layer]
            self.V[layer] = self.beta2 * self.V[layer] + (1 - self.beta2) * np.square(gradients[layer])
            m_hat = self.M[layer] / (1 - self.beta1**self.t)
            v_hat = self.V[layer] / (1 - self.beta2**self.t)
            new_weights[layer] = weights[layer] - self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)
        self.model.update_weights(new_weights)

### Metrics

In [6]:
class R2:
    def __init__(self):
        pass
    def __call__(self, y_true, y_pred):
        return 1 - np.sum(np.square(y_true - y_pred)) / np.sum(np.square(y_true - np.mean(y_true)))
    def name(self):
        return 'R2'

class Accuracy:
    def __init__(self):
        pass
    def __call__(self, y_true, y_pred):
        return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))
    def name(self):
        return 'Accuracy'
    
class Precision:
    def __init__(self):
        pass
    def __call__(self, y_true, y_pred):
        tp = np.sum(np.argmax(y_true, axis=1) & np.argmax(y_pred, axis=1))
        fp = np.sum((1-(np.argmax(y_true, axis=1)) & np.argmax(y_pred, axis=1)))
        return tp / (tp + fp)
    def name(self):
        return 'Precision'

class Recall:
    def __init__(self):
        pass
    def __call__(self, y_true, y_pred):
        tp = np.sum(np.argmax(y_true, axis=1) & np.argmax(y_pred, axis=1))
        fn = np.sum(np.argmax(y_true, axis=1) & (1-np.argmax(y_pred, axis=1)))
        return tp / (tp + fn)
    def name(self):
        return 'Recall'
    
class F1:
    def __init__(self):
        pass
    def __call__(self, y_true, y_pred):
        precision = Precision()(y_true, y_pred)
        recall = Recall()(y_true, y_pred)
        return 2 * precision * recall / (precision + recall)
    def name(self):
        return 'F1'

## MLP

In [7]:
class MLP:
    def print_weights(self):
        [print(weights) for weights in self.W]
        print()
    def print_num_layers(self):
        print(self.layers)
        print()
    def print_weights_shapes(self):
        [print(weights.shape) for weights in self.W]
        print()
    def print_outputs(self):
        [print(output) for output in self.O]
        print()
    def print_outputs_shapes(self):
        [print(output.shape) for output in self.O]
        print()
    def print_inputs(self):
        [print(input) for input in self.I]
        print()
    def print_inputs_shapes(self):
        [print(input.shape) for input in self.I]
        print()
    def print_gradients(self):
        [print(gradient) for gradient in self.gradients]
        print()
    def print_gradients_shapes(self):
        [print(gradient.shape) for gradient in self.gradients]
        print()
    
    def get_weights(self):
        return self.W
    def get_gradients(self):
        return self.gradients
    def update_weights(self, weights):
        self.W = weights

    def __init__(self, layers=[], initialization=He(), activations=None, activations_type=None, input_size=2, bias=True):
        self.bias = bias
        self.layers = list(range(len(layers)))

        self.I = [None]*(len(layers))
        self.O = [None]*(len(layers))

        activations_type = activations_type or ReLU()
        self.activations = activations or [activations_type]*len(layers)

        self.W = [initialization((input_size+bias, layers[0]))]
        self.W.extend([initialization((layers[i]+bias, layers[i+1])) for i in range(len(layers)-1)])

        self.gradients = [np.zeros_like(layer) for layer in self.W]

        self.reset_history()

    
    def forward(self, x):
        for i in self.layers:
            x = np.concatenate((x, np.ones((x.shape[0],1))), axis=1) if self.bias else x
            self.I[i] = np.mean(x, axis=0, keepdims=True)
            x = np.matmul(x,self.W[i])
            x = self.activations[i](x)
            self.O[i] = np.mean(x, axis=0, keepdims=True)
        return x


    def backward(self, loss):
        # Calculate Loss Derivative
        loss_der = loss.derivative()

        # Calculate Deltas
        delta = loss_der * self.activations[-1].derivative(self.O[-1])
        self.gradients[-1] = np.matmul(self.I[-1].T,delta)
        for i in reversed(self.layers[:-1]):
            if self.bias:
                delta = np.matmul(delta,self.W[i+1][:-1].T) * self.activations[i].derivative(self.O[i])
            else:
                delta = np.matmul(delta,self.W[i+1].T) * self.activations[i].derivative(self.O[i])
                
            self.gradients[i] = np.matmul(self.I[i].T,delta)
        self.grad_history = [np.concatenate([self.grad_history[i], self.gradients[i]]) for i in range(len(self.gradients))] if self.history_steps != 0 else self.gradients
        self.O_history    = [np.concatenate([self.O_history[i], self.O[i]]) for i in range(len(self.O))] if self.history_steps != 0 else self.gradients
        # self.grad_history = [self.grad_history[i] + self.gradients[i] for i in range(len(self.gradients))]
        # self.O_history    = [self.O_history[i] + self.O[i] for i in range(len(self.O))]
        self.history_steps += 1

    
    def fit(self, train_data, test_data=None, epochs=100, criteria=MeanSquaredError(), optimizer=None, batch_size=1, metrics=None, 
            plot_gradients=False, plot_activations=False, verbose=1, barmode='group'):
        
        optimizer = optimizer or ADAM(self)
        X_train, Y_train = train_data
        num_sections = np.ceil(X_train.shape[0] / batch_size)

        loss_history = []
        loss_val_history = []
        metrics_history = []
        metrics_val_history = []

        if test_data:
            X_test, Y_test = test_data


        for epoch in range(epochs):
            batches = np.array_split(np.random.permutation(X_train.shape[0]), num_sections)
            epoch_predictions = []
            epoch_targets = []
            epoch_losses = []

            for i, batch in enumerate(batches):
                self.reset_history()
                pred = self.forward(X_train[batch] )
                loss = criteria(Y_train[batch], pred)
                self.backward(loss)
                optimizer.step()

                epoch_predictions.append(pred)
                epoch_targets.append(Y_train[batch])
                epoch_losses.append(loss.value())
                
                if verbose == 2:
                    print_string = f'Epoch: {epoch+1}({i}/{num_sections}), '
                    for metric in metrics:
                        print_string += f'{metric.name}: {metric(Y_train[batch], pred)}, ' 
                    print_string += f'Loss: {loss}'
                    print(print_string)
            
            loss_history.append(np.mean(epoch_losses))
            predictions, targets = np.concatenate(epoch_predictions, axis=0), np.concatenate(epoch_targets, axis=0)
            metrics_history.append([metric(targets, predictions) for metric in metrics]) if metrics else None



            if plot_activations:
                self.plot_activations(barmode=barmode)
            if plot_gradients:
                self.plot_gradients(barmode=barmode)

            if test_data:
                pred = self.forward(X_test)
                val_loss = criteria(Y_test, pred)
                loss_val_history.append(val_loss.value())
                metrics_val_history.append([metric(Y_test, pred) for metric in metrics]) if metrics else None
            
            if verbose == 1:
                print_string = f'Epoch: {epoch+1:3d}, '
                if test_data and metrics:
                    for metric, metric_val, metric_type in zip(metrics_history[-1], metrics_val_history[-1], metrics):
                        print_string += f'{metric_type.name()}: {metric:.3f}, {metric_type.name()}_val: {metric_val:.3f}, '
                elif metrics:
                    for metric, metric_type in zip(metrics_history[-1], metrics):
                        print_string += f'{metric_type.name()}: {metric}, '
                print_string += f'Loss: {loss_history[-1]:.5f}'
                print(print_string)

        history = {'Loss': loss_history}
        metrics_history = np.array(metrics_history)
        metrics_val_history = np.array(metrics_val_history)
        loss_val_history = np.array(loss_val_history).flatten()
        if test_data and metrics:
            for i, metrics_type in enumerate(metrics):
                history[f'{metrics_type.name()}'] = metrics_history[:,i]
                history[f'{metrics_type.name()} (val)'] = metrics_val_history[:,i]
            history['Loss (val)'] = loss_val_history
        elif metrics:
            for i, metrics_type in enumerate(metrics):
                history[f'{metrics_type.name()}'] = metrics_history[:,i]
        elif test_data:
            history['Loss (val)'] = loss_val_history
        
        return dict(sorted(history.items(), key=lambda item: item[0]))


    def reset_history(self):
        self.grad_history  = [np.zeros_like(layer) for layer in self.W]
        self.O_history     = [np.zeros_like(layer) for layer in self.O]
        self.grad_history  = []
        self.O_history     = []
        self.history_steps = 0
    
    def plot_gradients(self, barmode='group'):
        gradients_hist   = np.concatenate([layer.flatten() for layer in self.grad_history])
        gradients_hist   = np.concatenate([gradients_hist, gradients_hist])
        gradients_colors = np.concatenate([np.full(layer.shape, i).flatten() for i, layer in enumerate(self.grad_history)])
        gradients_colors = np.concatenate([gradients_colors.astype(str), np.full(gradients_colors.shape, 'Global')])
        plot1 = px.histogram(gradients_hist, color=gradients_colors, title='Gradients', nbins=100, barmode=barmode, histnorm='percent'
                             ).update_traces(visible="legendonly", selector=lambda t: not t.name in ["Global"])
        plot1.update_yaxes(exponentformat = 'E')
        plot1.show()
        
    def plot_activations(self, barmode='group'):
        outputs_hist   = np.concatenate([(layer/self.history_steps).flatten() for layer in self.O_history])
        outputs_hist   = np.concatenate([layer.flatten() for layer in self.O_history])
        outputs_hist   = np.concatenate([outputs_hist, outputs_hist])
        outputs_colors = np.concatenate([np.full(layer.shape, i).flatten() for i, layer in enumerate(self.O_history)])
        outputs_colors = np.concatenate([outputs_colors.astype(str), np.full(outputs_colors.shape, 'Global')])
        plot2 = px.histogram(outputs_hist, color=outputs_colors, title='Activations', nbins=100, barmode=barmode, histnorm='percent'
                             ).update_traces(visible="legendonly", selector=lambda t: not t.name in ["Global"])

        plot2.update_yaxes(exponentformat = 'E')
        plot2.show()

In [8]:
def plot(hist):
    fig = px.line(hist)
    fig.update_layout(dict(updatemenus=[dict(
                                type = "buttons", direction = "left",
                                buttons=list([
                                    dict(args=["visible", "legendonly"], label="Deselect All", method="restyle"
                                    ),
                                    dict(args=["visible", True], label="Select All", method="restyle"
                                    )
                                ]),
                                pad={"r": 10, "t": 10}, showactive=False, x=1, xanchor="right", y=1.1, yanchor="top"
                            ),
                        ]
                ))
    fig.show()

# Ovefiting

In [9]:
weight1 = np.genfromtxt('weights1.csv', delimiter=',')
weight2 = np.expand_dims(np.genfromtxt('weights2.csv', delimiter=','), axis=0)
inputs   = np.genfromtxt('inputs.csv', delimiter=',')[:-1]
targets  = np.genfromtxt('inputs.csv', delimiter=',')[-1:]
np.genfromtxt('inputs.csv', delimiter=',').shape

targets = np.array([[1,0],[0,1],[0,0]])

criteria = SquaredSumError()
criteria = MeanSquaredError()
# criteria = BinaryCrossEntropy()
# criteria = CategoricalCrossEntropy()
activation = Sigmoid()
# activation = Tanh() # Fuck
# activation = ReLU() # Fuck
activation = LeakyReLU()
# activation = Linear()
model = MLP(layers=[100,3], input_size=2, activations_type=activation, initialization=He(), bias=True)
# model = MLP(layers=[100,3], input_size=2, activations=[activation, SoftMax()], initialization=He(), bias=True)
optimizer = SGD(model)
optimizer = ADAM(model)

hist = model.fit((inputs.T, targets.T), criteria=criteria, optimizer=optimizer, metrics=[Accuracy()])

np.concatenate([targets.T, model.forward(inputs.T)], axis=1)
pred = (model.forward(inputs.T) > 0.5).astype(np.int64)
# pred = model.forward(inputs.T)
np.concatenate([targets.T.flatten().reshape(-1,1), pred.flatten().reshape(-1,1)], axis=1)

Epoch:   1, Accuracy: 0.5, Loss: 0.37625
Epoch:   2, Accuracy: 0.5, Loss: 0.33875
Epoch:   3, Accuracy: 1.0, Loss: 0.33775
Epoch:   4, Accuracy: 1.0, Loss: 0.33741
Epoch:   5, Accuracy: 1.0, Loss: 0.33743
Epoch:   6, Accuracy: 0.5, Loss: 0.26622
Epoch:   7, Accuracy: 0.5, Loss: 0.20845
Epoch:   8, Accuracy: 0.5, Loss: 0.18523
Epoch:   9, Accuracy: 0.5, Loss: 0.22089
Epoch:  10, Accuracy: 0.5, Loss: 0.19902
Epoch:  11, Accuracy: 0.5, Loss: 0.17019
Epoch:  12, Accuracy: 0.5, Loss: 0.18074
Epoch:  13, Accuracy: 0.5, Loss: 0.18401
Epoch:  14, Accuracy: 0.5, Loss: 0.17876
Epoch:  15, Accuracy: 0.5, Loss: 0.16965
Epoch:  16, Accuracy: 0.5, Loss: 0.17350
Epoch:  17, Accuracy: 0.5, Loss: 0.17808
Epoch:  18, Accuracy: 0.5, Loss: 0.17414
Epoch:  19, Accuracy: 0.5, Loss: 0.16894
Epoch:  20, Accuracy: 0.5, Loss: 0.16925
Epoch:  21, Accuracy: 0.5, Loss: 0.17149
Epoch:  22, Accuracy: 0.5, Loss: 0.17030
Epoch:  23, Accuracy: 0.5, Loss: 0.16797
Epoch:  24, Accuracy: 1.0, Loss: 0.16793
Epoch:  25, Accu

array([[1, 1],
       [0, 0],
       [0, 0],
       [0, 0],
       [1, 1],
       [0, 0]])

In [10]:
px.line(hist).show()
# len(hist['loss_history']), len(hist['metrics_history'])
pd.DataFrame(hist)
# for key in hist.keys():
#     print(key, hist[key], end='\n\n\n')

Unnamed: 0,Accuracy,Loss
0,0.5,0.376252
1,0.5,0.338749
2,1.0,0.337747
3,1.0,0.337405
4,1.0,0.337425
...,...,...
95,1.0,0.000018
96,1.0,0.000018
97,1.0,0.000018
98,1.0,0.000018


In [11]:
weight1 = np.genfromtxt('weights1.csv', delimiter=',')
weight2 = np.expand_dims(np.genfromtxt('weights2.csv', delimiter=','), axis=0)
inputs   = np.genfromtxt('inputs.csv', delimiter=',')[:-1]
targets  = np.genfromtxt('inputs.csv', delimiter=',')[-1:]
np.genfromtxt('inputs.csv', delimiter=',').shape

targets = np.array([[1,0],[0,1],[0,0]])

criteria = SquaredSumError()
criteria = MeanSquaredError()
# criteria = BinaryCrossEntropy()
# criteria = CategoricalCrossEntropy()
activation = Sigmoid()
# activation = Tanh() # Fuck
# activation = ReLU() # Fuck
activation = LeakyReLU()
# activation = Linear()
model = MLP(layers=[100,3], input_size=2, activations_type=activation, initialization=He(), bias=True)
# model = MLP(layers=[100,3], input_size=2, activations=[activation, SoftMax()], initialization=He(), bias=True)
optimizer = SGD(model)
optimizer = ADAM(model)

num_iterations = 2**7
for epoch in range(num_iterations+1):
    indexes = np.arange(len(inputs))
    np.random.shuffle(indexes)
    for i in indexes:
        input = inputs.T[i].reshape(1,-1)
        target = targets.T[i].reshape(1,-1)
        
        pred = model.forward(input)
        loss = criteria(target, pred)
        model.backward(loss)
        optimizer.step()
    
    # if (epoch+1) % (num_iterations/10) == 0 or not epoch:
    if (epoch+1) & (epoch) == 0:
        pred = model.forward(inputs)
        loss = criteria(targets.T, pred)
        print("Epoch: %d, Loss: %.5f" % (epoch+1, loss.value()))

np.concatenate([targets.T, model.forward(inputs.T)], axis=1)
pred = (model.forward(inputs.T) > 0.5).astype(np.int64)
# pred = model.forward(inputs.T)
np.concatenate([targets.T.flatten().reshape(-1,1), pred.flatten().reshape(-1,1)], axis=1)

Epoch: 1, Loss: 0.17217
Epoch: 2, Loss: 0.16878
Epoch: 4, Loss: 0.18201
Epoch: 8, Loss: 0.03812
Epoch: 16, Loss: 0.02666
Epoch: 32, Loss: 0.00991
Epoch: 64, Loss: 0.01109
Epoch: 128, Loss: 0.01065


array([[1, 1],
       [0, 0],
       [0, 0],
       [0, 0],
       [1, 1],
       [0, 0]])

# Iris

In [12]:
# Load Iris with sklearn
iris = load_iris()

# Extract the features and target from the dataset
X_iris = iris.data
y_iris = iris.target

In [13]:
# Split the dataset into training and test set
X_train, X_test, Y_train, Y_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert the target to one-hot encoding
Y_train = np.eye(3)[Y_train]
Y_test = np.eye(3)[Y_test]

# Create the model
model = MLP(layers=[100,3], input_size=4, activations=[ReLU(), Sigmoid()], initialization=He(), bias=True)
optimizer = ADAM(model)
criteria = CategoricalCrossEntropy()
criteria = BinaryCrossEntropy()
hist = model.fit((X_train, Y_train), (X_test, Y_test), epochs=10, criteria=criteria, optimizer=optimizer, 
                 metrics=[Accuracy(), Precision(), Recall(), F1()], verbose=1, plot_activations=False, plot_gradients=False)


Epoch:   1, Accuracy: 0.883, Accuracy_val: 0.967, Precision: 0.575, Precision_val: 0.577, Recall: 0.539, Recall_val: 0.566, F1: 0.556, F1_val: 0.571, Loss: 0.23355
Epoch:   2, Accuracy: 0.942, Accuracy_val: 0.900, Precision: 0.591, Precision_val: 0.568, Recall: 0.582, Recall_val: 0.610, F1: 0.587, F1_val: 0.588, Loss: 0.10867
Epoch:   3, Accuracy: 0.917, Accuracy_val: 1.000, Precision: 0.589, Precision_val: 0.585, Recall: 0.595, Recall_val: 0.585, F1: 0.592, F1_val: 0.585, Loss: 0.12376
Epoch:   4, Accuracy: 0.958, Accuracy_val: 0.967, Precision: 0.596, Precision_val: 0.580, Recall: 0.593, Recall_val: 0.592, F1: 0.594, F1_val: 0.586, Loss: 0.06876
Epoch:   5, Accuracy: 0.942, Accuracy_val: 0.967, Precision: 0.592, Precision_val: 0.577, Recall: 0.589, Recall_val: 0.566, F1: 0.591, F1_val: 0.571, Loss: 0.07509
Epoch:   6, Accuracy: 0.967, Accuracy_val: 0.967, Precision: 0.598, Precision_val: 0.577, Recall: 0.598, Recall_val: 0.566, F1: 0.598, F1_val: 0.571, Loss: 0.05555
Epoch:   7, Accu

In [14]:
plot(hist)

# MNIST1d

In [15]:
# url = 'https://github.com/greydanus/mnist1d/raw/master/mnist1d_data.pkl'
# r = requests.get(url, allow_redirects=True)
# open('./mnist1d_data.pkl', 'wb').write(r.content)

In [16]:
np.float64
np.log(np.finfo(np.float64).max), np.exp(709), np.exp(709.5), np.exp(709.7), np.exp(709.8)
f'{float(np.float64(0.1234567890)):2f}'
f'{0.1234567890:.2f}'


overflow encountered in exp



'0.12'

In [17]:
with open('./mnist1d_data.pkl', 'rb') as handle:
    mnist1d = pickle.load(handle)

X_mnist1d, Y_mnist1d = mnist1d['x'], np.eye(10)[mnist1d['y']]
X_mnist1d_test, Y_mnist1d_test = mnist1d['x_test'], np.eye(10)[mnist1d['y_test']]
# dict_keys(['x', 'x_test', 'y', 'y_test', 't', 'templates'])  # these are NumPy arrays


In [27]:
# Create the model
model = MLP(layers=[300,10], input_size=40, activations=[ReLU(), Sigmoid()], initialization=He(), bias=True)
# model = MLP(layers=[300,10], input_size=40, activations=[ReLU(), SoftMax()], initialization=He(), bias=True)
optimizer = SGD(model)
criteria = CategoricalCrossEntropy()
criteria = BinaryCrossEntropy()
# hist = model.fit((X_mnist1d, Y_mnist1d), (X_mnist1d_test, Y_mnist1d_test), epochs=10, criteria=criteria, optimizer=optimizer, metrics=[Accuracy(), Precision(), Recall(), F1()], verbose=1)
hist = model.fit((X_mnist1d, Y_mnist1d), (X_mnist1d_test, Y_mnist1d_test), epochs=10, criteria=criteria, optimizer=optimizer, metrics=[Accuracy(), Precision()], verbose=1)


Epoch:   1, Accuracy: 0.322, Accuracy_val: 0.423, Precision: 0.462, Precision_val: 0.458, Loss: 0.38465
Epoch:   2, Accuracy: 0.511, Accuracy_val: 0.483, Precision: 0.508, Precision_val: 0.507, Loss: 0.22723
Epoch:   3, Accuracy: 0.582, Accuracy_val: 0.542, Precision: 0.521, Precision_val: 0.516, Loss: 0.19164
Epoch:   4, Accuracy: 0.645, Accuracy_val: 0.550, Precision: 0.536, Precision_val: 0.517, Loss: 0.16953
Epoch:   5, Accuracy: 0.691, Accuracy_val: 0.582, Precision: 0.542, Precision_val: 0.527, Loss: 0.15629
Epoch:   6, Accuracy: 0.712, Accuracy_val: 0.578, Precision: 0.544, Precision_val: 0.516, Loss: 0.14524
Epoch:   7, Accuracy: 0.743, Accuracy_val: 0.563, Precision: 0.551, Precision_val: 0.530, Loss: 0.13720
Epoch:   8, Accuracy: 0.758, Accuracy_val: 0.596, Precision: 0.550, Precision_val: 0.532, Loss: 0.12938
Epoch:   9, Accuracy: 0.780, Accuracy_val: 0.584, Precision: 0.557, Precision_val: 0.538, Loss: 0.12244
Epoch:  10, Accuracy: 0.799, Accuracy_val: 0.591, Precision: 0.5

In [28]:
# Create the model
model = MLP(layers=[100,80,60,40,20,10], input_size=40, activations=[Sigmoid()]*6, initialization=He(), bias=True)
# model = MLP(layers=[300,10], input_size=40, activations=[ReLU(), SoftMax()], initialization=He(), bias=True)
optimizer = SGD(model)
criteria = CategoricalCrossEntropy()
criteria = BinaryCrossEntropy()
# hist = model.fit((X_mnist1d, Y_mnist1d), (X_mnist1d_test, Y_mnist1d_test), epochs=10, criteria=criteria, optimizer=optimizer, metrics=[Accuracy(), Precision(), Recall(), F1()], verbose=1)
hist = model.fit((X_mnist1d, Y_mnist1d), (X_mnist1d_test, Y_mnist1d_test), epochs=1, criteria=criteria, optimizer=optimizer, 
                 metrics=[Accuracy(), Precision()], verbose=1, plot_activations=False, plot_gradients=True, barmode='overlay')


Epoch:   1, Accuracy: 0.099, Accuracy_val: 0.106, Precision: 0.398, Precision_val: 0.502, Loss: 0.33030


In [41]:
# Create the model
model = MLP(layers=[100,80,60,40,20,10], input_size=40, activations=[LeakyReLU()]*5+[Sigmoid()], initialization=Normal(), bias=True)
# model = MLP(layers=[300,10], input_size=40, activations=[ReLU(), SoftMax()], initialization=He(), bias=True)
optimizer = SGD(model)
criteria = CategoricalCrossEntropy()
criteria = BinaryCrossEntropy()
# hist = model.fit((X_mnist1d, Y_mnist1d), (X_mnist1d_test, Y_mnist1d_test), epochs=10, criteria=criteria, optimizer=optimizer, metrics=[Accuracy(), Precision(), Recall(), F1()], verbose=1)
hist = model.fit((X_mnist1d, Y_mnist1d), (X_mnist1d_test, Y_mnist1d_test), epochs=1, criteria=criteria, optimizer=optimizer, 
                 metrics=[Accuracy(), Precision()], verbose=1, plot_activations=False, plot_gradients=True, barmode='overlay')



overflow encountered in exp


divide by zero encountered in true_divide


invalid value encountered in true_divide


invalid value encountered in multiply


divide by zero encountered in log


invalid value encountered in multiply


invalid value encountered in long_scalars



Epoch:   1, Accuracy: 0.100, Accuracy_val: 0.102, Precision: nan, Precision_val: nan, Loss: nan



invalid value encountered in long_scalars



In [21]:
# for hash in iris.keys():
#     if hash not in ['data', 'target']:
#         print(f'"{hash}"\n', iris[hash], end='\n\n\n')

In [22]:
raise Exception
[(x/model.history_steps).flatten() for x in model.grad_history]

Exception: 

In [None]:

# # gradients_hist   = np.concatenate([(layer/model.history_steps).flatten() for layer in model.grad_history])
# # gradients_hist   = np.concatenate([gradients_hist, gradients_hist])
# # gradients_colors = np.concatenate([np.full(layer.shape, i).flatten() for i, layer in enumerate(model.grad_history)])
# # gradients_colors = np.concatenate([gradients_colors.astype(str), np.full(gradients_colors.shape, 'Global')])


# gradients_hist             = {i:(layer/model.history_steps).flatten() for i, layer in enumerate(model.grad_history)}
# gradients_hist['Global']   = np.concatenate([(layer/model.history_steps).flatten() for layer in model.grad_history])
# gradients_colors           = {i+1:np.full(layer.shape, i).flatten() for i, layer in enumerate(model.grad_history)}
# gradients_colors['Global'] = np.zeros(sum([layer.size for layer in model.grad_history]))

# plot1 = go.Histogram(gradients_hist, color=gradients_colors, title='Gradients', nbinsx=100, barmode='group')

# # plot1 = px.histogram(gradients_hist, color=gradients_colors, title='Gradients', nbins=100, barmode='group',
# #                         ).update_traces(visible="legendonly", selector=lambda t: not t.name in ["Global"])

# # outputs_hist   = np.concatenate([(layer/model.history_steps).flatten() for layer in model.O_history])
# # outputs_hist   = np.concatenate([outputs_hist, outputs_hist])
# # outputs_colors = np.concatenate([np.full(layer.shape, i).flatten() for i, layer in enumerate(model.O_history)])
# # outputs_colors = np.concatenate([outputs_colors.astype(str), np.full(outputs_colors.shape, 'Global')])
# # plot2 = px.histogram(outputs_hist, color=outputs_colors, title='Activations', nbins=100, barmode='group',
# #                         ).update_traces(visible="legendonly", selector=lambda t: not t.name in ["Global"])

# # # for trace in plot1["data"]:
# # #     if trace["type"] == "histogram":
# # #         trace["nbinsx"] = 100


# # fig = make_subplots(rows=1, cols=2) 
# # [fig.append_trace(traces, row=1, col=1) for traces in plot1["data"]]
# # [fig.append_trace(traces, row=1, col=2) for traces in plot2["data"]]
# # fig.update_traces(nbinsx=100, row=1, col=1)
# # fig.update_traces(nbinsx=100, row=1, col=2)
# # # fig.update_traces(nbinsx=dict(size=100), row=1, col=1)
# # fig.show()

# # Edit number of bins in figure row 1 col 1
# # fig.update_traces(nbinsx=100, nbinsy=100, selector=dict(type='histogram2d'))

In [None]:

# fig = go.Figure()
# fig.add_trace(go.Histogram(dict(x=[1,2,3,4,5], y=[1,2,3,4,5])))
# fig.show()

In [None]:
# model.plot()
# gradients_hist   = np.concatenate([(layer/model.history_steps).flatten() for layer in model.grad_history])
# gradients_hist   = np.concatenate([gradients_hist, gradients_hist])
# gradients_colors = np.concatenate([np.full(layer.shape, i).flatten() for i, layer in enumerate(model.grad_history)])
# gradients_colors = np.concatenate([gradients_colors.astype(str), np.full(gradients_colors.shape, 'Global')])
# px.histogram(gradients_hist, color=gradients_colors, title='Gradients', nbins=100, barmode='group',
#             #  color_discrete_sequence=px.colors.qualitative.D3
#              ).update_traces(visible="legendonly", selector=lambda t: not t.name in ["Global"])


gradients_hist   = np.concatenate([(layer/model.history_steps).flatten() for layer in model.grad_history])
gradients_hist   = np.concatenate([gradients_hist, gradients_hist])
gradients_colors = np.concatenate([np.full(layer.shape, i).flatten() for i, layer in enumerate(model.grad_history)])
gradients_colors = np.concatenate([gradients_colors.astype(str), np.full(gradients_colors.shape, 'Global')])
# go.Histogram(gradients_hist, color=gradients_colors, title='Gradients', nbins=100, barmode='group',
#             #  color_discrete_sequence=px.colors.qualitative.D3
#              ).update_traces(visible="legendonly", selector=lambda t: not t.name in ["Global"])

In [None]:
# import dash_core_components as dcc
from dash import dcc
import plotly.express as px
import plotly.subplots as sp


figure1 = px.histogram(gradients_hist, color=gradients_colors, title='Gradients', nbins=100, barmode='group',).update_traces(visible="legendonly", selector=lambda t: not t.name in ["Global"])
figure2 = px.histogram(gradients_hist, color=gradients_colors, title='Gradients', nbins=100, barmode='group',).update_traces(visible="legendonly", selector=lambda t: not t.name in ["Global"])



# For as many traces that exist per Express figure, get the traces from each plot and store them in an array.
# This is essentially breaking down the Express fig into it's traces
# figure1_traces = []
# figure2_traces = []
# for trace in range(len(figure1["data"])):
#     figure1_traces.append(figure1["data"][trace])
# for trace in range(len(figure2["data"])):
#     figure2_traces.append(figure2["data"][trace])
figure1_traces = [trace for trace in figure1["data"]]
figure2_traces = [trace for trace in figure2["data"]]

#Create a 1x2 subplot
this_figure = sp.make_subplots(rows=1, cols=2) 

# Get the Express fig broken down as traces and add the traces to the proper plot within in the subplot
# for traces in figure1["data"]:
#     this_figure.append_trace(traces, row=1, col=1)
# for traces in figure2["data"]:
#     this_figure.append_trace(traces, row=1, col=2)
[this_figure.append_trace(traces, row=1, col=1) for traces in figure1["data"]]
[this_figure.append_trace(traces, row=1, col=2) for traces in figure2["data"]]

#the subplot as shown in the above image
final_graph = dcc.Graph(figure=this_figure)
this_figure.show()


In [None]:
# final_graph.figure.show()

In [None]:
# import plotly.graph_objects as go

# histogram_traces = []

# gradients_colors +=1
# # for i, (hist, color) in enumerate(zip(gradients_hist, gradients_colors)):
# # for i in range(len(gradients_hist)):
# #     hist = gradients_hist[i]
# #     color = gradients_colors[i]
# #     trace = go.Histogram(x=hist, name=f'Hist {i}', opacity=1, marker=dict(color=color))
# #     histogram_traces.append(trace)

# trace = go.Histogram(x=gradients_hist, name=f'Hist {i}', opacity=1, marker=dict(color=gradients_colors))

# fig = go.Figure(data=trace)
# # fig = go.Figure(data=histogram_traces)

# # Update layout properties as needed
# fig.update_layout(
#     title='Gradients',
#     barmode='group',
# )

# # Make all traces except "Global" legendonly
# for trace in fig.data:
#     if trace.name != "Global":
#         trace.visible = "legendonly"

# fig.show()