In [1]:
import numpy as np
from random import randint
import matplotlib.pyplot as plt
import matplotlib.animation as animation

In [2]:
def relu(z, deriv=False):
    activations = []
    shape = z.shape
    z = z.flatten()
    if deriv:  # Return Derivative of Function
        
        for i in range(len(z)):  # Element Wise
            if z[i] >= 0:
                activations.append(1)
            else:
                activations.append(0.2)
                
        return np.array(activations).reshape(shape)
    
    for i in range(len(z)):
        if z[i] > 0:
            activations.append(z[i])
        else:
            activations.append(0.2 * z[i])
            
    return np.array(activations).reshape(shape)

input = [1.,0.,-5.]

print('LeakyReLU: ', input, ' -> ', list(relu(np.array(input))))
print('LeakyReLU (deriv): ', input, ' -> ', list(relu(np.array(input), True)))

LeakyReLU:  [1.0, 0.0, -5.0]  ->  [1.0, 0.0, -1.0]
LeakyReLU (deriv):  [1.0, 0.0, -5.0]  ->  [1.0, 1.0, 0.2]


In [3]:
def add_layer(weight, bias, size, input_dim=None):
    if not input_dim:
        input_dim = weight[-1].shape[0]
    weight.append(1 * np.random.uniform(-1, 1, (size, input_dim)))
    bias.append(np.zeros(size))
    print(weight[-1].shape)

w, b = [], []
add_layer(w, b, 3, 2)
add_layer(w, b, 3)
add_layer(w, b, 1)

(3, 2)
(3, 3)
(1, 3)


In [4]:
a, z = [], []  # Global Variables


def forward_prop(X, do_print=False):
    h = X
    if do_print: print('h: ', h, h.shape)
    global a, z
    a,z  = [], []
    for i in range(len(w)):
        if do_print: print(f'Layer {i} | {w[i].T.shape} dot {h.shape}')
        h = h @ w[i].T  # weigt * input
        h = h + b[i] # bias add
        z.append(h)
        h = relu(h) # Activation Function
        a.append(h)
    return h

forward_prop(np.array([
    [0,1], 
    [0, 4], 
    [1, 3], 
    [5, 4]]), True)

h:  [[0 1]
 [0 4]
 [1 3]
 [5 4]] (4, 2)
Layer 0 | (2, 3) dot (4, 2)
Layer 1 | (3, 3) dot (4, 3)
Layer 2 | (3, 1) dot (4, 3)


array([[ 0.12454926],
       [ 0.49819706],
       [ 0.29871294],
       [-0.10089022]])

In [5]:
np.set_printoptions(precision=3)

X = np.array([[1,0], 
              [0,1], 
              [1,1], 
              [0,0]])
print(X.shape)
y = np.array([[1], [1], [0], [0]])

learning_rate = 0.3

def loss(pred, y, deriv=False):
    return 0.5* 1/len(y) * np.sum((pred - y)**2)
    
def backprop_entry(X, label, print_loss=False):
    global a, z, w, b, n_weights, n_bias, learning_rate
    n_weights, n_bias = [], []
    y = forward_prop(X)
#     print(y.T, label.T)
    backprop_rec(0, X, y, label)
    
    # Update Weights
    w = list(reversed(n_weights))
    b = list(reversed(n_bias))
    if print_loss:
        loss(y, label)
    return loss(y, label)  # Return Loss


def backprop_rec(i, X, y, label):
    global a, z, w, b, n_weights, n_bias
    # Base Case
    if i+1 > len(w): 
        return (y - label)
    g = backprop_rec(i+1, X, y, label) * relu(z[i], True)  # Get Next Layer Derivative
#     print("Jacobi", g.shape)
    # Derivative with respect to weight [1xn]  
    if i-1 < 0: w_der = X  # Input Matrix
    else: w_der = a[i-1]  # Previous Layer Activation

    # Save change in weights
#     print("Weight Deriv:", w_der.shape, (w_der.T @ g).shape)
    n_weights.append(w[i] - learning_rate * 1/len(X) * (w_der.T @ g).T)
    n_bias.append(b[i] - learning_rate * 1/len(X) * np.mean(g, axis=0))
    
#     print('new weight',np.mean(g,axis=0).shape, b[i].shape)
    
    return g @ w[i]

def train_rec(epochs, size=100, threshold=0.0001, print_interval=1):
    l = []
    for i in range(epochs):
        sum_loss = 0.0
        for x in range(size):
            sum_loss += backprop_entry(X, y)
        l.append(sum_loss/size)
        if not i % print_interval: print(f'[Epoch {i}] Loss: {l[-1]}')
        if l[-1] < threshold or l[-1] != l[-1]: break
    return np.array(l).flatten()

# Reinitialize Wei
w, b = [], []
add_layer(w, b, 3, 2)
add_layer(w, b, 3)
add_layer(w, b, 1)

loss_over_time = train_rec(100,10)

print(forward_prop(np.array([[1,0], 
                             [0,1], 
                             [1,1], 
                             [0,0]])))

(4, 2)
(3, 2)
(3, 3)
(1, 3)
[Epoch 0] Loss: 0.26653747571750086
[Epoch 1] Loss: 0.2566856514467325
[Epoch 2] Loss: 0.1880964544620037
[Epoch 3] Loss: 0.13629583696721856
[Epoch 4] Loss: 0.12594758896617184
[Epoch 5] Loss: 0.12027495473788216
[Epoch 6] Loss: 0.11748132388200819
[Epoch 7] Loss: 0.11437981091119351
[Epoch 8] Loss: 0.1105386333805781
[Epoch 9] Loss: 0.10609238279785696
[Epoch 10] Loss: 0.10149943962946484
[Epoch 11] Loss: 0.09751976201422782
[Epoch 12] Loss: 0.09461208548678167
[Epoch 13] Loss: 0.09260258769921699
[Epoch 14] Loss: 0.09108657240967481
[Epoch 15] Loss: 0.0898459764126605
[Epoch 16] Loss: 0.08868521437596437
[Epoch 17] Loss: 0.08753815360599879
[Epoch 18] Loss: 0.0862616846293354
[Epoch 19] Loss: 0.0820919872940937
[Epoch 20] Loss: 0.07998234257918932
[Epoch 21] Loss: 0.07774228119446737
[Epoch 22] Loss: 0.07324723343927313
[Epoch 23] Loss: 0.06535071801534258
[Epoch 24] Loss: 0.05286032012136518
[Epoch 25] Loss: 0.03630211999089078
[Epoch 26] Loss: 0.0203852

In [6]:
import pandas as pd
data = pd.read_csv('./data/iris.data', header=None)

data = pd.concat([data,pd.get_dummies(data.iloc()[:,4])],axis=1)
data = data.drop(data.columns[4], axis=1)

w, b = [], []
add_layer(w, b, 3, 4)
add_layer(w, b, 3)
add_layer(w, b, 3)

def get_batch(size):
    d = data.sample(size, replace=True).to_numpy()
    return d[:,:4], d[:, 4:]
get_batch(2)

(3, 4)
(3, 3)
(3, 3)


(array([[6. , 3. , 4.8, 1.8],
        [5.9, 3. , 5.1, 1.8]]), array([[0., 0., 1.],
        [0., 0., 1.]]))

In [7]:
def save_animation(fig):
    global images
    ani = animation.ArtistAnimation(fig, ims, interval=50, blit=True,repeat_delay=1000)

def plot(size, save=None, title=''):
    fig = plt.figure(figsize=(12,12))
    ax = fig.add_subplot(111, projection='3d')
    
    setosa = data.loc[data['Iris-setosa'] == 1]
    xs, ys, zs, c = setosa.iloc()[:,0], setosa.iloc()[:,1], setosa.iloc()[:,2], setosa.iloc()[:,3]
    ax.scatter(xs, ys, zs, marker='+', c=c, cmap=plt.hot(), s=size[data['Iris-setosa'] == 1])

    setosa = data.loc[data['Iris-versicolor'] == 1]
    xs, ys, zs, c = setosa.iloc()[:,0], setosa.iloc()[:,1], setosa.iloc()[:,2], setosa.iloc()[:,3]
    ax.scatter(xs, ys, zs, marker='o', c=c, cmap=plt.hot(), s=size[data['Iris-setosa'] == 1])

    setosa = data.loc[data['Iris-virginica'] == 1]
    xs, ys, zs, c = setosa.iloc()[:,0], setosa.iloc()[:,1], setosa.iloc()[:,2], setosa.iloc()[:,3]
    ax.scatter(xs, ys, zs, marker='2', c=c, cmap=plt.hot(), s=size[data['Iris-setosa'] == 1])
    ax.set_title(title, loc='left')
    if save:
        plt.savefig(save)
    else:
        plt.show()
    
    plt.clf()


In [8]:
def eval_iris():
    X, y = get_batch(20)
    pred = forward_prop(X)
    for p, label in zip(pred, y):
        print(f'Prediction: {np.argmax(p)} ({np.argmax(label)}) [{loss(p, label)}]')
eval_iris()

Prediction: 2 (1) [0.5511470323484505]
Prediction: 2 (0) [0.31169476814677777]
Prediction: 2 (2) [0.06260242414138231]
Prediction: 2 (2) [0.10881020056797604]
Prediction: 2 (1) [0.5592752049396987]
Prediction: 2 (1) [0.4813903133766376]
Prediction: 2 (0) [0.28714608607973957]
Prediction: 2 (0) [0.30960119704206657]
Prediction: 2 (0) [0.3306110469393563]
Prediction: 2 (0) [0.288530479182536]
Prediction: 2 (2) [0.1469414285418094]
Prediction: 2 (1) [0.4728110587351098]
Prediction: 2 (2) [0.08263965370094531]
Prediction: 2 (0) [0.28959165425686095]
Prediction: 2 (1) [0.5993567406041197]
Prediction: 2 (2) [0.1725680993409199]
Prediction: 2 (0) [0.29208516242081783]
Prediction: 2 (2) [0.07885761734943578]
Prediction: 2 (2) [0.06271805409918188]
Prediction: 2 (0) [0.33507901024347975]


In [30]:
np.set_printoptions(precision=3)

learning_rate = 0.02

# Reinitialize Wei
w, b = [], []
add_layer(w, b, 3, input_dim=4)
add_layer(w, b, 9)
add_layer(w, b, 9)
add_layer(w, b, 3)


def plot_progress(epoch, title):
    pred = forward_prop(data.iloc()[:,:4].to_numpy())
    x = np.array([loss(pred[i], data.iloc()[i, 4:].to_numpy()) for i in range(len(pred))])
    plot(x*10500, f'./plot/plot-{str(epoch).zfill(10)}.png', title)

def train_iris(epochs, size=100, threshold=0.0001, print_interval=1):
    l = []
    for i in range(epochs):
        sum_loss = 0.0
        X, y = get_batch(size)
        
        loss = backprop_entry(X, y)
        if np.isnan(loss) or np.isinf(loss) or loss > 30: return
        if not i % print_interval: plot_progress(i, title=f'Epoch {i}\nLoss: {loss}')
        if not i % print_interval: print(f'[Epoch: {i}] Loss: {loss}')
    return np.array(l).flatten()

loss_over_time = train_iris(1000, 1000, print_interval=10)
eval_iris()

(3, 4)
(9, 3)
(9, 9)
(3, 9)
[Epoch: 0] Loss: 1.2137183385046155
[Epoch: 10] Loss: 0.7117066899717497
[Epoch: 20] Loss: 0.5694374662835284
[Epoch: 30] Loss: 0.5082335075488582
[Epoch: 40] Loss: 0.4784308773590633
[Epoch: 50] Loss: 0.4632331490907975
[Epoch: 60] Loss: 0.4383525946338631
[Epoch: 70] Loss: 0.4331607062049674
[Epoch: 80] Loss: 0.42741212154196784
[Epoch: 90] Loss: 0.3943878509222643
[Epoch: 100] Loss: 0.3854277006937037
[Epoch: 110] Loss: 0.3950156996963507
[Epoch: 120] Loss: 0.3764879030165527
[Epoch: 130] Loss: 0.34835935527595774
[Epoch: 140] Loss: 0.33526177759501213
[Epoch: 150] Loss: 0.30671324925326243
[Epoch: 160] Loss: 0.2996637776801248
[Epoch: 170] Loss: 0.3015765537250387
[Epoch: 180] Loss: 0.30652127988504185
[Epoch: 190] Loss: 0.296626504442253


  


[Epoch: 200] Loss: 0.2801816568796253
[Epoch: 210] Loss: 0.2778022397480799
[Epoch: 220] Loss: 0.26209437254375506
[Epoch: 230] Loss: 0.24120957277771235
[Epoch: 240] Loss: 0.2392531418226996
[Epoch: 250] Loss: 0.22570342469389373
[Epoch: 260] Loss: 0.22407532060889868
[Epoch: 270] Loss: 0.21915428405781526
[Epoch: 280] Loss: 0.20907415614180666
[Epoch: 290] Loss: 0.20044287882354883
[Epoch: 300] Loss: 0.20417230365843222
[Epoch: 310] Loss: 0.1997549569360276
[Epoch: 320] Loss: 0.18789309492369874
[Epoch: 330] Loss: 0.1763895989103186
[Epoch: 340] Loss: 0.1796972231474691
[Epoch: 350] Loss: 0.17896724606598155
[Epoch: 360] Loss: 0.17714663723845364
[Epoch: 370] Loss: 0.16741728219957183
[Epoch: 380] Loss: 0.16606038806132445
[Epoch: 390] Loss: 0.16696449146585884
[Epoch: 400] Loss: 0.16402764997451777
[Epoch: 410] Loss: 0.1668788132304621
[Epoch: 420] Loss: 0.16365332447482434
[Epoch: 430] Loss: 0.15795094810500063
[Epoch: 440] Loss: 0.156100664916848
[Epoch: 450] Loss: 0.1542076256583

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

In [27]:
eval_iris()

Prediction: 2 (2) [0.0012188285344752327]
Prediction: 2 (2) [0.009270333121173594]
Prediction: 0 (0) [0.0006989888195230844]
Prediction: 1 (1) [0.0020973451777913654]
Prediction: 0 (0) [0.018831007533301393]
Prediction: 1 (1) [0.006143811973531553]
Prediction: 2 (2) [0.0012188285344752327]
Prediction: 0 (0) [0.004317992632145459]
Prediction: 0 (0) [0.0034972866402013373]
Prediction: 0 (0) [0.00027522496254157825]
Prediction: 0 (0) [0.0034972866402013373]
Prediction: 2 (2) [0.00044210652706016347]
Prediction: 1 (1) [0.0021965410204396363]
Prediction: 1 (1) [0.00157126691350693]
Prediction: 1 (1) [0.01509798641489862]
Prediction: 2 (2) [0.00027028735562496244]
Prediction: 0 (0) [0.0016726728603097519]
Prediction: 1 (1) [0.0017081150702217378]
Prediction: 2 (2) [0.026426741010337932]
Prediction: 1 (1) [0.0023507980571148652]


In [10]:
for i in b:
    print(i.shape)

(3,)
(6,)
(6,)
(6,)
(3,)
