In [21]:
import numpy as np

In [22]:
class NN():
    
    def __init__(self, layers, act_funcs):
        
        self.epsilon = 1e-6
        self.layers = layers
        self.act_funcs = act_funcs
        self.act_func_map = {'relu':self.ReLU, 'sigmoid':self.sigmoid, 'tanh':self.tanh, 'none':self.none, 'softmax':self.softmax}
        self.act_diff_map = {'relu':self.ReLU_diff, 'sigmoid':self.sigmoid_diff, 'tanh':self.tanh_diff, 'softmax':self.softmax_diff}
        self.loss_error_map = {'mse':self.mse_error, 'binary_crossentropy':self.bincross_error, 'categorical_crossentropy':self.catcross_error}
        self.metric_map = {'mse':self.calc_mse, 'binary_crossentropy':self.calc_bincross, 'categorical_crossentropy':self.calc_catcross, 'accuracy':self.calc_accuracy}
        
        self.weights = [ [] for _ in range(len(self.layers)) ]
        self.biases = [ [] for _ in range(len(self.layers)) ]  
        self.activations = [ [] for _ in range(len(self.layers)) ]
        
        for i in range(1, len(layers)):
            
            self.weights[i] = np.random.randn(layers[i-1], layers[i]) * np.sqrt(2/layers[i])
            self.biases[i] = np.zeros(layers[i])
            
    def tanh(self, X):
        return np.divide( (np.exp(X) - np.exp(-X)), (np.exp(X) + np.exp(-X) + self.epsilon) )
    
    def none(self, X):
        return X
    
    def sigmoid(self, X):
        f = np.vectorize(self.stable_sigmoid)
        return f(X)
        
    def stable_sigmoid(self, x):
        if x >= 0:
            z = np.exp(-x)
            return 1 / (1 + z)
        else:
            z = np.exp(x)
            return z / (1 + z)
    
    def ReLU(self, X):
        return np.maximum(X, 0)
    
    def softmax(self, X):
        b = np.mean(X)
        return np.exp(X - b)/np.sum(np.exp(X - b))
    
    def tanh_diff(self, X):
        return 1 - np.square(self.tanh(X))
    
    def sigmoid_diff(self, X):
        return np.multiply(self.sigmoid(X), (1-self.sigmoid(X)))
    
    def ReLU_diff(self, X):
        X[X<=0] = 0
        X[X>0] = 1
        return X
    
    def softmax_diff(self, X):
        return np.multiply(self.softmax(X), (1-self.softmax(X)))
        
    def fit(self, X, y, **kwargs):  
        
        if len(y.shape) == 1:
            y = np.expand_dims(y, 1)
          
        self.epochs = kwargs.get('epochs', None)    
        self.eta = kwargs.get('learning_rate', None)    
        self.valid = kwargs.get('validation_data', None)  
        self.batch_size = kwargs.get('batch_size', self.epochs)  
        
        self.history = {}
        
        for metric in self.metrics:
            self.history[metric] = [] 
            if self.valid != None:
                self.history[f'val_{metric}'] = []   
                if len(self.valid[1].shape) == 1:
                    self.valid[1] = np.expand_dims(self.valid[1], 1)
            
        for epoch in range(1, self.epochs+1):
            
            n_batches = np.ceil(self.epochs/self.batch_size).astype(int)       
            i = 0     
            
            for _ in range(n_batches):  
                
                if i+self.batch_size > len(X):        
                    batch_inp = X[i:]  
                    batch_targets = y[i:]
                else:  
                    batch_inp = X[i:i+self.batch_size]
                    batch_targets = y[i:i+self.batch_size]
                    
                self.forward_pass(batch_inp)
                self.back_propagation(batch_targets)
                i += self.batch_size
            
            for metric in self.metrics:
                self.history[metric].append(self.metric_map[metric](batch_inp, batch_targets))
                if self.valid != None:
                    self.history[f'val_{metric}'].append(self.metric_map[metric](self.valid[0], self.valid[1]))
            
            print(f'\nEpoch {epoch} ==> ', end='')
            for k,v in self.history.items():
                print(f'{k}: {v[-1]}', end=' ')
            
    def forward_pass(self, X):  
        self.activations[0] = X
        for i in range(1, len(self.layers)):
            act_func = self.act_func_map[self.act_funcs[i]]
            self.new = np.dot(self.activations[i-1], self.weights[i]) + self.biases[i]
            self.activations[i] = act_func(np.dot(self.activations[i-1], self.weights[i]) + self.biases[i])
            
    def back_propagation(self, y):
        
        y_hat = self.activations[-1]
        e = self.loss_func_error(y, y_hat)
        
        for i in range(len(self.weights)-1, 0, -1):
            
            if i<len(self.weights)-1:
                e = np.dot(e, self.weights[i+1].T)
                
            if self.act_funcs[i] != 'none' :
                act_diff = self.act_diff_map[self.act_funcs[i]](self.activations[i])
                e = np.multiply(e, act_diff)
            
            bias_gradients = self.clip_gradients(np.sum(e, axis=0))
            weight_gradients = self.clip_gradients(np.dot(self.activations[i-1].T, e))
            self.weights[i] = self.weights[i] - self.eta*weight_gradients
            self.biases[i] = self.biases[i] - self.eta*bias_gradients
        
    def compile_network(self, loss, *args):
        
        if len(args) == 1:
            self.metrics = args[0]
        else:
            self.metrics = []
        self.metrics.insert(0, loss)
        self.loss_func_error = self.loss_error_map[loss]
        self.loss = loss
        
    def predict(self, X):
        self.forward_pass(X)
        if self.loss == 'binary_crossentropy':
            return (self.activations[-1] > 0.5).astype(int)
        else:
            return self.activations[-1]
        
    def clip_gradients(self, grad):
        grad[grad>1] = 1
        grad[grad<-1] = -1
        return grad
        
    def mse_error(self, y, y_hat):      
        n_samples = y.shape[0]
        e = -(2/n_samples)*(y-y_hat)
        return e
            
    def bincross_error(self, y, y_hat):     
        n_samples = y.shape[0]
        e = (1/n_samples)*( -np.divide(y, y_hat+ self.epsilon) + np.divide((1-y), (1-y_hat+ self.epsilon)) )   
        return e
    
    def catcross_error(self, y, y_hat):     
        n_samples = y.shape[0]
        n_classes = y.shape[1]
        e = (1/n_samples*n_classes)*( -np.divide(y, y_hat+ self.epsilon) )   
        return e
    
    def calc_mse(self, X, y):
        n_samples = y.shape[0]
        y_hat = self.predict(X)
        return np.sum(np.square(y - y_hat))/n_samples
    
    def calc_bincross(self, X, y):
        n_samples = y.shape[0]
        self.forward_pass(X)
        y_hat = self.activations[-1]
        return np.sum(np.multiply(y, np.log(y_hat + self.epsilon)) + np.multiply((1-y), np.log(1-y_hat + self.epsilon)) ) / n_samples
    
    def calc_catcross(self, X, y):
        n_samples = y.shape[0]
        n_classes = y.shape[1]
        self.forward_pass(X)
        y_hat = self.activations[-1]
        return np.sum(np.multiply(y, np.log(y_hat + self.epsilon)))/(n_samples*n_classes)
    
    def calc_rmse(self, X, y):
        return np.sqrt(self.calc_mse(X, y))    
        
    def calc_r2(self, X, y):
        n_samples = y.shape[0]
        y_hat = self.predict(X)
        r2 = 1 - (np.sum(np.square(y - y_hat))) / (np.sum(np.square(y - np.mean(y))))
        return r2
    
    def calc_accuracy(self, X, y):
        n_samples = y.shape[0]
        if self.loss == 'binary_crossentropy':
            y_hat = (self.predict(X) > 0.5).astype(int)
            return np.sum(y == y_hat) / n_samples
        elif self.loss == 'categorical_crossentropy':
            y_hat = np.argmax(self.predict(X))
            return np.sum(np.argmax(y) == y_hat) / n_samples
        
    def evaluate(self, X, y):
        
        if len(y.shape) == 1:
            y = np.expand_dims(y, 1)
        
        if self.loss == 'mse':
            print(f"Root mean square error: {self.calc_rmse(X, y)}")
            print(f"R2 score: {self.calc_r2(X, y)}")
            
        else:
            print(f"Accuracy: {self.calc_accuracy(X, y)}")

In [23]:
#Regression

#X = np.random.rand(15000, 5)
#y = 4*np.square(X[:, 0]) + 1.4*X[:, 1] + 3.5*np.sqrt(np.abs(X[:, 2])) + 0.5*X[:, 3] - 2.7*(X[:, 4])

In [24]:
# Binary Classification

def sigmoid(x):
    return 1/(1+np.exp(-x))

X = np.random.rand(15000, 5)
y = sigmoid(0.4*np.square(X[:, 0]) + 0.7*X[:, 1] + 0.5*np.sqrt(np.abs(X[:, 2])) + 0.25*X[:, 3] - 0.17*(X[:, 4]))
y = (y>0.7).astype(int)

In [25]:
# Multi-Class classification

# from tensorflow.keras.datasets import mnist
# (X_train, y_train), _ = mnist.load_data()

# X = X_train.reshape(X_train.shape[0], -1)
# X = X[:15000]
# X = X/255.0

# y = y_train[:15000]
# z = np.zeros((y.shape[0], 10))
# for i in range(len(y)):
#     z[i, y[i]] = 1
# y = z

In [26]:
model = NN([5, 10, 7, 1], ['none', 'relu', 'relu', 'none'])
# model = NN([5, 10, 7, 1], ['none', 'relu', 'relu', 'sigmoid'])
# model = NN([784, 1200, 550, 10], ['none', 'relu', 'relu', 'softmax'])

In [27]:
model.compile_network('mse')
# model.compile_network('binary_crossentropy', ['accuracy'])
# model.compile_network('categorical_crossentropy', ['accuracy'])

In [28]:
model.fit(X[:10000], y[:10000], epochs=100, batch_size=100, learning_rate=0.1, validation_data=[X[10000:], y[10000:]])
# model.fit(X[:10000], y[:10000], epochs=100, batch_size=100, learning_rate=0.1, validation_data=[X[10000:], y[10000:]])
# model.fit(X[:10000], y[:10000], epochs=100, batch_size=100, learning_rate=0.001, validation_data=[X[10000:], y[10000:]])


Epoch 1 ==> mse: 0.41116456737737656 val_mse: 0.4226586282304884 
Epoch 2 ==> mse: 0.3352310917097101 val_mse: 0.3773214144481722 
Epoch 3 ==> mse: 0.3021346355300285 val_mse: 0.3182416456637003 
Epoch 4 ==> mse: 0.2712493985740437 val_mse: 0.3040422957095154 
Epoch 5 ==> mse: 0.2470639251090455 val_mse: 0.2691861422605654 
Epoch 6 ==> mse: 0.23107720920681438 val_mse: 0.2583362265859489 
Epoch 7 ==> mse: 0.21904000336761562 val_mse: 0.24273881630802097 
Epoch 8 ==> mse: 0.21011168880111109 val_mse: 0.2339996362574395 
Epoch 9 ==> mse: 0.20265519519818145 val_mse: 0.22568030504375228 
Epoch 10 ==> mse: 0.19671856124149062 val_mse: 0.21744626224100475 
Epoch 11 ==> mse: 0.19170766254039662 val_mse: 0.21175598973652981 
Epoch 12 ==> mse: 0.18758316833537506 val_mse: 0.20557252026883038 
Epoch 13 ==> mse: 0.18398935735657315 val_mse: 0.2017323312108726 
Epoch 14 ==> mse: 0.18079435072303077 val_mse: 0.1969771481590587 
Epoch 15 ==> mse: 0.17831450050941214 val_mse: 0.1939604371332297 
Ep

In [29]:
model.predict(X[10000:])

array([[ 0.7474967 ],
       [ 0.74395809],
       [ 0.37299054],
       ...,
       [-0.18000952],
       [-0.13340807],
       [ 0.56683151]])

In [30]:
model.evaluate(X[10000:], y[10000:])

Root mean square error: 0.3134586957348973
R2 score: 0.6069441460273768
