In [64]:
from autograd import grad
from autograd import elementwise_grad
import autograd.numpy as np

In [65]:
import neuralnetwork_w_softder
from neuralnetwork_w_softder import Sigmoid
from neuralnetwork_w_softder import RELU
from neuralnetwork_w_softder import LRELU
from neuralnetwork_w_softder import Softmax

from neuralnetwork_w_softder import MSE
from neuralnetwork_w_softder import BCE_logloss
from neuralnetwork_w_softder import MCE_multiclass
import math
import sys
import warnings
from random import random, seed
from copy import deepcopy, copy
from typing import Tuple, Callable
from sklearn.utils import resample
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

Comparing autograd with the manual written derivatives

In [66]:
#The data we are testing for the activation
inputs = np.array([0.2, 0.5, 0.8, 1.0, 1.5]).reshape(-1, 1)

#The data we are testing for cost-functions
targets = np.array([0.25, 0.5, 0.75, 1.0]).reshape(-1, 1)
predict = np.array([0.22, 0.54, 0.77, 1.1]).reshape(-1, 1)

These are the derivatives with the manual written code:

In [67]:
MSE_deri = MSE()
MSE_deri.derive = True
MSE_deri(predict, targets, weights=None, lmd=0)

cost_store = np.zeros((4,3))
cost_list = [MSE(), BCE_logloss(), MCE_multiclass()]
for i,cost in enumerate(cost_list):
    cost_func = cost
    cost_func.derive = True
    cost_store[:,i] = cost_func(predict, targets, weights=None, lmd=0).ravel()

print(cost_store)

[[-0.015      -0.04370629 -0.28409091]
 [ 0.02        0.04025765 -0.23148148]
 [ 0.01        0.02823264 -0.24350649]
 [ 0.05       -0.22727273 -0.22727273]]


Autograd (with the functions needed - werent able to use autograd on classes)

In [68]:
import autograd.numpy as np
np.random.seed(123)
def CostOLS(target):
    
    def func(X):
        return (1.0 / target.shape[0]) * np.sum((target - X) ** 2)

    return func


def CostLogReg(target):

    def func(X):
        
        return -(1.0 / target.shape[0]) * np.sum(
            (target * np.log(X + 10e-10)) + ((1 - target) * np.log(1 - X + 10e-10))
        )
    return func 


def CostCrossEntropy(target):
    
    def func(X):
        return -(1.0 / target.size) * np.sum(target * np.log(X + 10e-10))

    return func

In [69]:
func_list = [CostOLS, CostLogReg, CostCrossEntropy]
auto_store = np.zeros((4,3))
for i,cost_ in enumerate(func_list):
    func_list = cost_
    func_der = grad(func_list(targets))
    auto_store[:,i] = func_der(predict).ravel()

print(auto_store)

[[-0.015      -0.04370629 -0.28409091]
 [ 0.02        0.04025765 -0.23148148]
 [ 0.01        0.02823264 -0.24350649]
 [ 0.05       -0.22727273 -0.22727273]]


  return f_raw(*args, **kwargs)


Here we check if the manually written derivatives of the cost functions are equal to the autograd derivatives. The tolerance when using np.allclose is automatically chosen to 10^(-6), and all the values are larger than 0.01 so this tolerance is good enough.

In [70]:
if np.allclose(cost_store, auto_store):
    print('Juhu! The derivatives are correct compared with autograd:)')
else:
    print('Buhu! The derivatives are not equal:(')

Juhu! The derivatives are correct compared with autograd:)


Onto the activation functions:

(Not softmax as we are using the combined derivative of softmax and cross-entropy)

Manual code:

In [71]:
activations_list = [Sigmoid(), RELU(), LRELU()]
act_der = np.zeros((5,3))

for i,act in enumerate(activations_list):
    activation = act
    activation.derive = True
    act_der[:,i] = activation(inputs).ravel()

print(act_der)

[[0.24751657 1.         1.        ]
 [0.23500371 1.         1.        ]
 [0.2139097  1.         1.        ]
 [0.19661193 1.         1.        ]
 [0.14914645 1.         1.        ]]


Autograd:

In [72]:
def sigmoid(X):
    try:
        return 1.0 / (1 + np.exp(-X))
    except FloatingPointError:
        return np.where(X > np.zeros(X.shape), np.ones(X.shape), np.zeros(X.shape))


def softmax(X):
    X = X - np.max(X, axis=-1, keepdims=True)
    delta = 10e-10
    return np.exp(X) / (np.sum(np.exp(X), axis=-1, keepdims=True) + delta)


def RELU(X):
    return np.where(X > np.zeros(X.shape), X, np.zeros(X.shape))


def LRELU(X):
    delta = 10e-4
    return np.where(X > np.zeros(X.shape), X, delta * X)


In [73]:
auto_act = [sigmoid, RELU, LRELU]
act_store = np.zeros((5,3))
for i,act_ in enumerate(auto_act):
    act_grad = elementwise_grad(act_)
    act_store[:,i] = act_grad(inputs).ravel()

print(act_store)

[[0.24751657 1.         1.        ]
 [0.23500371 1.         1.        ]
 [0.2139097  1.         1.        ]
 [0.19661193 1.         1.        ]
 [0.14914645 1.         1.        ]]


Here we check if the manually written derivatives of the activation functions are equal to the autograd derivatives. The tolerance when using np.allclose is automatically chosen to 10^(-6), and all the values are larger than 0.1 so this tolerance is good enough.

In [74]:
if np.allclose(act_der, act_store):
    print('Juhu! The derivatives are correct compared with autograd:)')
else:
    print('Buhu! The derivatives are not equal:(')

Juhu! The derivatives are correct compared with autograd:)
