In [None]:
import numpy as np
from random import randint

In [2]:
def relu(z, deriv=False):
    activations = []
    shape = z.shape
    z = z.flatten()
    if deriv:  # Return Derivative of Function
        
        for i in range(len(z)):  # Element Wise
            if z[i] >= 0:
                activations.append(1)
            else:
                activations.append(0.2)
                
        return np.array(activations).reshape(shape)
    
    for i in range(len(z)):
        if z[i] > 0:
            activations.append(z[i])
        else:
            activations.append(0.2 * z[i])
            
    return np.array(activations).reshape(shape)

input = [1.,0.,-5.]
print('LeakyReLU: ', input, ' -> ', list(relu(np.array(input))))

print('LeakyReLU (deriv): ', input, ' -> ', list(relu(np.array(input), True)))

In [3]:
def add_layer(weight, bias, size, input_dim=None):
    if not input_dim:
        input_dim = weight[-1].shape[0]
    weight.append(1 * np.random.uniform(-1, 1, (size, input_dim)))
    bias.append(np.zeros(size))
    print(weight[-1].shape)

w, b = [], []
add_layer(w, b, 3, 2)
add_layer(w, b, 3)
add_layer(w, b, 1)

(3, 2)
(3, 3)
(1, 3)


In [4]:
a, z = [], []  # Global Variables


def forward_prop(X, do_print=False):
    h = X
    if do_print: print('h: ', h, h.shape)
    global a, z
    a,z  = [], []
    for i in range(len(w)):
        if do_print: print(f'Layer {i} | {w[i].T.shape} dot {h.shape}')
        h = h @ w[i].T  # weigt * input
        h = h + b[i] # bias add
        z.append(h)
        h = relu(h) # Activation Function
        a.append(h)
    return h

forward_prop(np.array([
    [0,1], 
    [0, 4], 
    [1, 3], 
    [5, 4]]), True)

h:  [[0 1]
 [0 4]
 [1 3]
 [5 4]] (4, 2)
Layer 0 | (2, 3) dot (4, 2)
Layer 1 | (3, 3) dot (4, 3)
Layer 2 | (3, 1) dot (4, 3)


array([[0.30424509],
       [1.21698037],
       [0.5180534 ],
       [0.10904002]])

In [12]:
np.set_printoptions(precision=3)

X = np.array([[1,0], 
              [0,1], 
              [1,1], 
              [0,0]])
print(X.shape)
y = np.array([[1], [1], [0], [0]])

learning_rate = 0.3

def loss(pred, y, deriv=False):
    return 0.5* 1/len(y) * np.sum((pred - y)**2)
    
def backprop_entry(X, label, print_loss=False):
    global a, z, w, b, n_weights, n_bias, learning_rate
    n_weights, n_bias = [], []
    y = forward_prop(X)
#     print(y.T, label.T)
    backprop_rec(0, X, y, label)
    
    # Update Weights
    w = list(reversed(n_weights))
    b = list(reversed(n_bias))
    if print_loss:
        loss(y, label)
    return loss(y, label)  # Return Loss


def backprop_rec(i, X, y, label):
    global a, z, w, b, n_weights, n_bias
    # Base Case
    if i+1 > len(w): 
        return (y - label)
    g = backprop_rec(i+1, X, y, label) * relu(z[i], True)  # Get Next Layer Derivative
#     print("Jacobi", g.shape)
    # Derivative with respect to weight [1xn]  
    if i-1 < 0: w_der = X  # Input Matrix
    else: w_der = a[i-1]  # Previous Layer Activation

    # Save change in weights
#     print("Weight Deriv:", w_der.shape, (w_der.T @ g).shape)
    n_weights.append(w[i] - learning_rate * 1/len(X) * (w_der.T @ g).T)
    n_bias.append(b[i] - learning_rate * 1/len(X) * np.mean(g, axis=0))
    
#     print('new weight',np.mean(g,axis=0).shape, b[i].shape)
    
    return g @ w[i]

def train_rec(epochs, size=100, threshold=0.0001, print_interval=1):
    l = []
    for i in range(epochs):
        sum_loss = 0.0
        for x in range(size):
            sum_loss += backprop_entry(X, y)
        l.append(sum_loss/size)
        if not i % print_interval: print(f'[Epoch {i}] Loss: {l[-1]}')
        if l[-1] < threshold or l[-1] != l[-1]: break
    return np.array(l).flatten()

# Reinitialize Wei
w, b = [], []
add_layer(w, b, 3, 2)
add_layer(w, b, 3)
add_layer(w, b, 1)

loss_over_time = train_rec(100,10)

print(forward_prop(np.array([[1,0], 
                             [0,1], 
                             [1,1], 
                             [0,0]])))

(4, 2)
(3, 2)
(3, 3)
(1, 3)
[Epoch 0] Loss: 0.20002296198569222
[Epoch 1] Loss: 0.1285353643227414
[Epoch 2] Loss: 0.11614952876341414
[Epoch 3] Loss: 0.11140891813685874
[Epoch 4] Loss: 0.10766448533234615
[Epoch 5] Loss: 0.10466218445739747
[Epoch 6] Loss: 0.10264927858294384
[Epoch 7] Loss: 0.10092676447068935
[Epoch 8] Loss: 0.09921366138463471
[Epoch 9] Loss: 0.09769475394163422
[Epoch 10] Loss: 0.09614067202375312
[Epoch 11] Loss: 0.09465823171833829
[Epoch 12] Loss: 0.09323496445249799
[Epoch 13] Loss: 0.09185980533248343
[Epoch 14] Loss: 0.09036340935365184
[Epoch 15] Loss: 0.08889370037921625
[Epoch 16] Loss: 0.08727222588977107
[Epoch 17] Loss: 0.08570643169721723
[Epoch 18] Loss: 0.08429399736255701
[Epoch 19] Loss: 0.0828135993212475
[Epoch 20] Loss: 0.08126145748260857
[Epoch 21] Loss: 0.07956812401392374
[Epoch 22] Loss: 0.0776859782743706
[Epoch 23] Loss: 0.06805484453054836
[Epoch 24] Loss: 0.05271964694281026
[Epoch 25] Loss: 0.03675888576949414
[Epoch 26] Loss: 0.0236

In [6]:
import pandas as pd
data = pd.read_csv('./data/iris.data', header=None)

data = pd.concat([data,pd.get_dummies(data.iloc()[:,4])],axis=1)
data = data.drop(data.columns[4], axis=1)

w, b = [], []
add_layer(w, b, 3, 4)
add_layer(w, b, 3)
add_layer(w, b, 3)

def get_batch(size):
    d = data.sample(size, replace=True).to_numpy()
    return d[:,:4], d[:, 4:]
get_batch(2)

(3, 4)
(3, 3)
(3, 3)


(array([[6.4, 3.2, 4.5, 1.5],
        [6.8, 3.2, 5.9, 2.3]]), array([[0., 1., 0.],
        [0., 0., 1.]]))

In [17]:
np.set_printoptions(precision=3)


learning_rate = 0.2

# Reinitialize Wei
w, b = [], []
add_layer(w, b, 3, input_dim=4)
add_layer(w, b, 6)
add_layer(w, b, 6)
add_layer(w, b, 6)
add_layer(w, b, 3)

def train_iris(epochs, size=100, threshold=0.0001, print_interval=1):
    l = []
    for i in range(epochs):
        sum_loss = 0.0
        X, y = get_batch(size)
        
        
        if not i % print_interval: print(f'[Epoch {i}] Loss: {backprop_entry(X, y)}')
#         if l[-1] < threshold or l[-1] != l[-1]: break
    return np.array(l).flatten()

loss_over_time = train_iris(1000, 1500, print_interval=100)
eval_iris()

(3, 4)
(6, 3)
(6, 6)
(6, 6)
(3, 6)
[Epoch 0] Loss: 1.9639126526460076
[Epoch 100] Loss: 0.7451599861500217
[Epoch 200] Loss: 0.683965629531997
[Epoch 300] Loss: 0.7016441886771566
[Epoch 400] Loss: 0.4354250181102146
[Epoch 500] Loss: 0.41492213897076613
[Epoch 600] Loss: 0.4008219685485013
[Epoch 700] Loss: 0.3956987193057733
[Epoch 800] Loss: 0.3808141067733698
[Epoch 900] Loss: 0.373312488440401
Prediction: 0 (1) [0.14432013647672512]
Prediction: 0 (1) [0.1482015576594029]
Prediction: 0 (1) [0.14502052912289254]
Prediction: 0 (1) [0.1428939297624538]
Prediction: 0 (1) [0.14755022956293187]
Prediction: 0 (1) [0.14238492457966684]
Prediction: 0 (0) [0.09527308444197449]
Prediction: 0 (1) [0.14755022956293187]
Prediction: 0 (0) [0.09107808188405205]
Prediction: 0 (0) [0.09879624048112991]
Prediction: 0 (0) [0.10310064124509373]
Prediction: 0 (2) [0.12049214567281144]
Prediction: 0 (2) [0.1195969859252416]
Prediction: 0 (2) [0.11926582008620626]
Prediction: 0 (2) [0.12162173077665002]
P

In [8]:
def eval_iris():
    X, y = get_batch(20)
    pred = forward_prop(X)
    for p, label in zip(pred, y):
        print(f'Prediction: {np.argmax(p)} ({np.argmax(label)}) [{loss(p, label)}]')
eval_iris()

Prediction: 0 (0) [0.001405609412778772]
Prediction: 2 (2) [0.07033369361992152]
Prediction: 0 (0) [0.0008782069765449825]
Prediction: 0 (0) [0.0006072314585636652]
Prediction: 0 (0) [0.007384173868787134]
Prediction: 0 (0) [0.002203145123753915]
Prediction: 2 (2) [0.07082063326090632]
Prediction: 2 (1) [0.0932930517372528]
Prediction: 2 (2) [0.07061437789629602]
Prediction: 2 (1) [0.0940222465868569]
Prediction: 0 (0) [0.0008950220999844055]
Prediction: 2 (2) [0.06930771429030433]
Prediction: 2 (1) [0.09300236441891647]
Prediction: 0 (0) [0.00033716663512219905]
Prediction: 2 (2) [0.07221230360530786]
Prediction: 0 (0) [0.0017255614460869734]
Prediction: 2 (2) [0.07033369361992152]
Prediction: 2 (2) [0.07265265188904353]
Prediction: 0 (0) [0.003149062867059336]
Prediction: 2 (2) [0.07217626860218396]


In [None]:
loss(np.array([[0,0,1]]), np.array([1,0,1]))

In [None]:
for i in b:
    print(i.shape)