In [1]:
import numpy as np


In [2]:
np.random.seed(42)
W1 = np.random.randn(4,3)*0.001

In [3]:
W1

array([[ 0.00049671, -0.00013826,  0.00064769],
       [ 0.00152303, -0.00023415, -0.00023414],
       [ 0.00157921,  0.00076743, -0.00046947],
       [ 0.00054256, -0.00046342, -0.00046573]])

4-neurons in hidden layer , 3 input features 

W2 = 4 neurons and 4 input features - output - 1 

In [4]:
W2 = np.random.randn(1,4)*0.001

In [5]:
W2

array([[ 0.00024196, -0.00191328, -0.00172492, -0.00056229]])

In [6]:
b1 = np.zeros((4,1))

In [7]:
b2 = np.zeros((1,1))

In [8]:
X = np.array([[1,1,0],[1,0,1],[1,0,1]])
y = np.array([1,0,1])

forward pass 1 , input - hidden layer 1

In [9]:
Z1 = W1@X.T+b1 
A1 = np.maximum(0, Z1)

In [10]:
A1

array([[3.58449852e-04, 1.14440269e-03, 1.14440269e-03],
       [1.28887648e-03, 1.28889290e-03, 1.28889290e-03],
       [2.34664754e-03, 1.10973843e-03, 1.10973843e-03],
       [7.91423508e-05, 7.68302900e-05, 7.68302900e-05]])

A1 ~ input for the 2nd hidden layer

In [11]:
Z2 = W2@A1 + b2 

In [12]:
Z2

array([[-6.47152552e-06, -4.14651937e-06, -4.14651937e-06]])

In [13]:
def sigmoid(z):
    
    return 1 / (1 + np.exp(-z))

In [14]:
def ReLu(z):
    return np.maximum(0,z)

In [15]:
A2 = sigmoid(Z2)

In [16]:
A2

array([[0.49999838, 0.49999896, 0.49999896]])

In [17]:
def sigmoid_derivative(a):
    return a*(1-a)

In [18]:
def ReLu_derivative(z):
    return (z>0).astype(int)

In [19]:
def cross_entropy_loss(y_true,y_predict):
    los = -(y_true*np.log(y_predict+1e-7) + (1-y_true)*np.log(1-y_predict+1e-7))
    return np.mean(los)

In [20]:
def cross_entropy_loss_derivative(y,a):
    one = 1-y/1-a
    two = y/a
    return one-two

In [21]:
def forward_pass(cache):
    x,w1,b1,w2,b2 = cache
    
    
    z1 = x@w1.T + b1.T
    
    a1 = ReLu(z1)
    z2 = a1@w2.T+b2
    a2 = sigmoid(z2)
    return x,z1,a1,z2,a2
    

In [25]:
def backprop(cache,y,w2):
    x,z1,a1,z2,a2 = cache
    
    
    erro_at_two  =  a2-y
    
    Dw2 = (erro_at_two.T@a1)/x.shape[0]
    
    error_at_one = (erro_at_two@w2)*(ReLu_derivative(z1))
    
    
    Dw1 = (error_at_one.T@x)/x.shape[0]
   
    Db2 = np.sum(erro_at_two, axis=0, keepdims=True).T / x.shape[0]
    Db1 = np.sum(error_at_one, axis=0, keepdims=True).T/x.shape[0]
    
   
    return Dw1,Db1,Dw2,Db2
    
    
    

In [26]:
def update_params(w1,b1,w2,b2,dw1,db1,dw2,db2,lr=1e-2):
    w1 -= lr*dw1
    w2 -= lr*dw2
    b1 -= lr*db1
    b2 -= lr*db2
    return w1,b1,w2,b2

In [27]:
def neural_net(x,y,W1,b1,W2,b2):
    for i in range(1000):
        cache = x,W1,b1,W2,b2
        cache2 = forward_pass(cache)
        
        dw1,db1,dw2,db2 = backprop(cache2,y,W2)
        W1,b1,W2,b2 = update_params(W1,b1,W2,b2,dw1,db1,dw2,db2)
        

In [28]:
y1 = y.reshape(1,-1).T

In [39]:
neural_net(Xn,y1,W1,b1,W2,b2)

In [40]:
W1

array([[ 4.96714153e-04, -1.01967852e-04,  6.11392089e-04],
       [ 1.52302986e-03, -1.33250350e-03,  8.64213167e-04],
       [ 1.57921282e-03,  2.70453237e-04,  2.75071065e-05],
       [ 5.42560044e-04, -4.64743019e-04, -4.64404427e-04]])

In [41]:
cache = X,W1,b1,W2,b2

In [42]:
cache2 = forward_pass(cache)

In [43]:
probs = cache2[-1]

In [44]:
probs

array([[0.65119191],
       [0.65119053],
       [0.65119053]])

In [45]:
(probs>0.5).astype(int)

array([[1],
       [1],
       [1]])

In [35]:
from sklearn.preprocessing import StandardScaler

In [36]:
sc = StandardScaler()

In [37]:
Xn = sc.fit_transform(X)

In [38]:
Xn

array([[ 0.        ,  1.41421356, -1.41421356],
       [ 0.        , -0.70710678,  0.70710678],
       [ 0.        , -0.70710678,  0.70710678]])

In [10]:
class Neuron:
    def __init__(self,w=None,b=0.0,n_inputs=1):
        if w is None:
            self.weight = np.random.randn(1,n_inputs)*0.01
            self.bias = b 
       
    def ReLu(self,z):
        return np.maximum(0,z)
        
    def ReLu_derivative(self,z):
        return (z>0).astype(int)
        
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))
    def softmax(self,Z):
        Z = Z - np.max(Z, axis=1, keepdims=True)   
        exp_Z = np.exp(Z)
        return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
    
    def Calculations(self,x,act):
        self.x =x
        
        self.z = x@self.weight.T + self.bias
        if(act=="relu"):
            self.activation = self.ReLu(self.z)
        elif(act == "sigmoid"):
            self.activation = self.sigmoid(self.z)
        elif(act == "softmax"):
            self.activation = self.softmax(self.z)
        
    def error_cal(self,error):
        self.wt_gr = (error.T@self.x)/self.x.shape[0]
        self.bias_gr = np.sum(error,axis=0, keepdims=True).T / self.x.shape[0]
            
    def update_weight(self,lr=0.1):
        self.weight -= lr*self.wt_gr
        self.bias -= lr*self.bias_gr
    
        
        
        
            

In [35]:
class Layer:
    def __init__(self,no_neurons,inputs,is_output=False,act="relu",loss_function="MSE"):
        self.loss_function = loss_function
        self.act = act
        self.neurons = np.array([Neuron(n_inputs = inputs) for _ in range(no_neurons)])
        self.is_last = is_output 
    def binary_cross_entropy_error(self,y_true,y_predict):
        los = -(y_true*np.log(y_predict+1e-7) + (1-y_true)*np.log(1-y_predict+1e-7))
        return np.mean(los)
    def cross_entropy_error(self, y_true, y_pred):
        y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))
    def mean_squared_error(self,y_true,y_pred):
        return y_pred - y_true
    def forward(self,x):
        self.x =x
        for n in self.neurons:
            n.Calculations(x,self.act)
    def ReLu_derivative(self,z):
        return (z>0).astype(int)
    def sigmoid_derivative(self,a):
        return a*(1-a)
    
    def back_prop(self,above_l):
        self.layer_weight = np.vstack([n.weight for n in self.neurons])
        
        self.z = np.hstack([n.z for n in self.neurons])
        self.a = np.hstack([n.activation for n in self.neurons])
        if(self.is_last == True):
            self.error = self.a - above_l 
           
        else:
            
            if self.loss_function == "MSE":
                grad = 1
                if self.act == 'ReLU':
                    grad = self.ReLu_derivative(self.z)
                elif self.act == 'sigmoid':
                    grad = self.sigmoid_derivative(self.a)
                
            
                self.error = (above_l.error @ above_l.layer_weight) * grad
            
        for i in range(len(self.neurons)):
            self.neurons[i].error_cal(self.error[:,i:i+1])
            self.neurons[i].update_weight()
            
        
        
        
            
        
            
        
    
    

In [37]:
def neural_nets(X,y,neurons,input_dim,output,itr=50):
    l1 = Layer(neurons,input_dim)
    l2 = Layer(neurons,neurons,)
    l3 = Layer(output,neurons,is_output=True)
    for i in range(itr):
        l1.forward(X)
        x2 = np.hstack([n.activation for n in l1.neurons])
        l2.forward(x2)
        x3 = np.hstack([n.activation for n in l2.neurons])
        l3.forward(x3)
        l3.back_prop(y)
        l2.back_prop(l3)
        l1.back_prop(l2)
    preds = np.hstack([n.activation for n in l3.neurons])
    return preds,l1,l2,l3
    

In [38]:
X = np.array([
    [0.0, 0.0, 1.0],
    [1.0, 0.0, 0.0],
    [1.0, 1.0, 1.0]
])

In [39]:
y = np.array([
    [0.0],
    [1.0],
    [1.0]
])

In [40]:
y.shape

(3, 1)

In [41]:
pred = neural_nets(X,y,4,3,1,1000)

In [42]:
pred

(array([[0.66666673],
        [0.66666633],
        [0.66666689]]),
 <__main__.Layer at 0x27dd823e180>,
 <__main__.Layer at 0x27dd7e86d50>,
 <__main__.Layer at 0x27dd823f0b0>)

In [154]:
(pred>0.5).astype(int)

array([[1],
       [1],
       [1]])

In [43]:
class Neural_Network:
    def __init__(self,no_layer,no_neurons,input_dim,output_dim,iters=1000):
        self.layers = []
        self.layers.append(Layer(no_neurons,input_dim))
        self.layers.extend([Layer(no_neurons,no_neurons) for _ in range(no_layer-2)])
        self.layers.append(Layer(output_dim,no_neurons,is_output=True))
        self.iter = iters 
    def fit_network(self,X,y):
        for _ in range(self.iter):
            x = X
            for l in self.layers:
                l.forward(x)
                x = np.hstack([n.activation for n in l.neurons])
                
            n = len(self.layers)
            self.layers[n-1].back_prop(y)
            for j in range(len(self.layers)-2,-1,-1):
                self.layers[j].back_prop(self.layers[j+1])
    def predict(self,X):
        x = X
        m = len(self.layers)
        for l in self.layers:
            l.forward(x)
            x = np.hstack([n.activation for n in l.neurons])
        preds = [n.activation for n in self.layers[m-1].neurons]
        return preds 
                
                
                
        

In [44]:
Nn = Neural_Network(2,4,3,1,1000)

In [45]:
len(Nn.layers)

2

In [46]:
Nn.layers[0].neurons[0].weight.shape

(1, 3)

In [47]:
Nn.layers[1].neurons[0].weight.shape

(1, 4)

In [48]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [49]:
iris = load_iris()
X, y = iris.data, iris.target

In [50]:
X.shape

(150, 4)

In [51]:
idx = np.arange(X.shape[0])
np.random.seed(42)
np.random.shuffle(idx)

In [52]:
Xs = X[idx]
ys = y[idx]

In [53]:
X_train,X_test = Xs[:120],Xs[120:]

In [54]:
Y_train,Y_test = ys[:120],ys[120:]

In [56]:
X_train.shape,Y_train.shape

((120, 4), (120,))

In [57]:
Y_trainu = Y_train.reshape(1,-1)

In [58]:
Y_trainu.shape

(1, 120)

In [59]:
Nn2 = Neural_Network(4,6,4,1,1000)

In [60]:
Nn2.fit_network(X_train,Y_trainu.T)

In [61]:
Nn2.layers[3].error.shape

(120, 1)