In [1]:
import numpy as np

In [2]:
np.random.seed(42)

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def segmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

# 2 Input "And" Gate

In [4]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 0, 0, 1])
X, y

(array([[0, 0],
        [0, 1],
        [1, 0],
        [1, 1]]),
 array([0, 0, 0, 1]))

In [5]:
w1 = np.random.rand()
w2 = np.random.rand()
b = np.random.rand()
f"{w1=}, {w2=}, {b=}"

'w1=0.3745401188473625, w2=0.9507143064099162, b=0.7319939418114051'

In [6]:
def forward(x, w1, w2, b):
    s1 = x[0] * w1
    s2 = x[1] * w2
    a = s1 + s2 + b
    z = sigmoid(a)
    return z, a

In [7]:
forward(X[0], w1, w2, b)

(0.6752426767850529, 0.7319939418114051)

In [8]:
def loss(X, y, w1, w2, b):
    loss = 0
    for x, y_true in zip(X, y):
        y_pred = forward(x, w1, w2, b)[0]
        loss += 0.5 * (y_true - y_pred) ** 2
    return loss

In [9]:
loss(X, y, w1, w2, b)

0.8723062541831995

In [10]:
def backward(X, y, w1, w2, b):
    del_w1 = 0
    del_w2 = 0
    del_b = 0
    for x, y_true in zip(X, y):
        y_pred, a= forward(x, w1, w2, b)
        del_z = y_pred - y_true
        del_a = segmoid_prime(a)
        del_s1 = x[0]
        del_s2 = x[1]
        del_w1 += del_z * del_a * del_s1
        del_w2 += del_z * del_a * del_s2
        del_b += del_z * del_a
    return del_w1, del_w2, del_b

In [11]:
w1, w2, b

(0.3745401188473625, 0.9507143064099162, 0.7319939418114051)

In [12]:
backward(X, y, w1, w2, b)

(0.12895768951083897, 0.10006797336465945, 0.3884862448293551)

In [18]:
lrt = 1
for _ in range(10):
    dw1, dw2, db = backward(X, y, w1, w2, b)
    w1 -= lrt * dw1
    w2 -= lrt * dw2
    b -= lrt * db
    print(loss(X, y, w1, w2, b))

3.87411167721693e-05
3.874076724383232e-05
3.874041772178629e-05
3.874006820603046e-05
3.8739718696564754e-05
3.873936919338842e-05
3.8739019696503756e-05
3.873867020590821e-05
3.873832072160225e-05
3.8737971243585766e-05


In [19]:
forward(X[0], w1, w2, b)[0], forward(X[1], w1, w2, b)[0], forward(X[2], w1, w2, b)[0], forward(X[3], w1, w2, b)[0]

(1.3066666841691875e-07,
 0.004772673077705222,
 0.004772673077705222,
 0.9943502986046346)

In [20]:
def And2(x1, x2):
    return int(round(forward([x1, x2], w1, w2, b)[0], 0))

In [21]:
And2(0, 0), And2(0, 1), And2(1, 0), And2(1, 1)

(0, 0, 0, 1)

# 2 Input OR Gate

In [22]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 1])
X, y

(array([[0, 0],
        [0, 1],
        [1, 0],
        [1, 1]]),
 array([0, 1, 1, 1]))

In [23]:
w = np.random.rand(2)
b = np.random.rand(1)
f"{w=}, {b=}"

'w=array([0.59865848, 0.15601864]), b=array([0.15599452])'

In [24]:
def forward(x, w, b):
    s = np.dot(x, w) + b
    z = sigmoid(s)
    return z, s

forward(X, w, b)

(array([0.53891974, 0.57737657, 0.68019172, 0.71313758]),
 array([0.15599452, 0.31201316, 0.754653  , 0.91067164]))

In [27]:
def loss(X, y, w, b):
    loss = 0
    for x, y_true in zip(X, y):
        y_pred = forward(x, w, b)[0]
        loss += 0.5 * (y_true - y_pred) ** 2
    return loss[0]

loss(X, y, w, b)

0.326806213602541

In [28]:
def backward(X, y, w, b):
    del_w = np.zeros_like(w)
    del_b = np.zeros_like(b)

    y_pred, s = forward(X, w, b)
    del_z = y_pred - y
    del_s = segmoid_prime(s)
    del_w += np.dot(del_z * del_s, X)
    del_b += np.sum(del_z * del_s, axis=0, keepdims=True)
    return del_w, del_b


backward(X, y, w, b)

(array([-0.12825232, -0.16180968]), array([-0.09746427]))

In [38]:
lrt = 1
for _ in range(10000):
    dw, db = backward(X, y, w, b)
    w -= lrt * dw
    b -= lrt * db
    print(loss(X, y, w, b))


0.00013720867563390447
0.00013720048078455057
0.00013719228690891878
0.0001371840940068361
0.0001371759020781278
0.00013716771112262416
0.00013715952114014878
0.00013715133213052953
0.0001371431440935953
0.00013713495702917218
0.00013712677093708742
0.00013711858581716803
0.00013711040166924124
0.00013710221849313692
0.00013709403628867718
0.00013708585505569344
0.00013707767479401316
0.00013706949550346101
0.00013706131718386777
0.00013705313983505843
0.00013704496345686289
0.0001370367880491051
0.000137028613611617
0.00013702044014422528
0.0001370122676467565
0.00013700409611903835
0.00013699592556090014
0.00013698775597216858
0.00013697958735267316
0.00013697141970223981
0.00013696325302069718
0.00013695508730787314
0.0001369469225635962
0.00013693875878769606
0.00013693059597999873
0.00013692243414033386
0.00013691427326852505
0.00013690611336440815
0.00013689795442780757
0.00013688979645855228
0.00013688163945646845
0.00013687348342138862
0.00013686532835313834
0.00013685717425154

In [41]:
def Or2(x1, x2):
    return forward([x1, x2], w, b)[0].round().astype(int)[0]

In [42]:
Or2(0, 0), Or2(0, 1), Or2(1, 0), Or2(1, 1)

(0, 1, 1, 1)

# 2 Input XOR Gate

In [43]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])
X, y

(array([[0, 0],
        [0, 1],
        [1, 0],
        [1, 1]]),
 array([0, 1, 1, 0]))

2 Inputs <br>
1 hidden layer with 2 neurons <br>
1 output <br>


In [53]:
w1 = np.random.rand(2, 2)
w2 = np.random.rand(2, 1)
b1 = np.random.rand(2)
b2 = np.random.rand(1)
print(f"{w1=}\n{w2=}\n{b1=}, {b2=}")

w1=array([[0.18340451, 0.30424224],
       [0.52475643, 0.43194502]])
w2=array([[0.29122914],
       [0.61185289]])
b1=array([0.13949386, 0.29214465]), b2=array([0.36636184])


In [45]:
def forward(x, w1, w2, b1, b2):
    s1 = np.dot(x, w1) + b1
    z1 = sigmoid(s1)
    s2 = np.dot(z1, w2) + b2
    z2 = sigmoid(s2)
    return z2, s2, z1, s1

forward(X[0], w1, w2, b1, b2)

(array([0.67533599]),
 array([0.7324195]),
 array([0.69687117, 0.55288622]),
 array([0.83244264, 0.21233911]))

In [46]:
def loss(X, y, w1, w2, b1, b2):
    y_pred, _, _, _ = forward(X, w1, w2, b1, b2)
    loss = 0
    for y_true, y_pred in zip(y, y_pred):
        loss += 0.5 * (y_true - y_pred) ** 2
    return loss[0]


loss(X, y, w1, w2, b1, b2)

0.5823104395794695

In [47]:
a = np.array([[1, 2], 
              [3, 4]])
b = np.array([5, 6])
a, b, a@b, a*b,

(array([[1, 2],
        [3, 4]]),
 array([5, 6]),
 array([17, 39]),
 array([[ 5, 12],
        [15, 24]]))

In [48]:
np.tensordot(a, b, axes = 0), np.tensordot(a, b, axes = 0).shape

(array([[[ 5,  6],
         [10, 12]],
 
        [[15, 18],
         [20, 24]]]),
 (2, 2, 2))

In [49]:
def backward(X, y, w1, w2, b1, b2):
    del_w1 = np.zeros_like(w1)
    del_w2 = np.zeros_like(w2)
    del_b1 = np.zeros_like(b1)
    del_b2 = np.zeros_like(b2)

    y_pred, s2, z1, s1 = forward(X, w1, w2, b1, b2)

    del_z2 = y_pred.reshape(y.shape) - y

    del_s2 = segmoid_prime(s2).reshape(y.shape)

    del_w2 += np.dot(z1.T, del_z2 * del_s2).reshape(w2.shape)

    del_z1 = np.tensordot(del_z2 * del_s2, w2.T, axes=0).reshape(z1.shape)
    del_s1 = segmoid_prime(s1)
    del_w1 += np.dot(X.T, del_z1 * del_s1)

    del_b2 += np.sum(del_z2 * del_s2, axis=0)
    del_b1 += np.sum(del_z1 * del_s1, axis=0)
    return del_w1, del_w2, del_b1, del_b2


backward(X, y, w1, w2, b1, b2)

(array([[0.00019447, 0.00636977],
        [0.00024919, 0.0051897 ]]),
 array([[0.13027526],
        [0.11807031]]),
 array([0.00064652, 0.0300295 ]),
 array([0.17297179]))

In [55]:
lrt = 1
for _ in range(10000):
    dw1, dw2, db1, db2 = backward(X, y, w1, w2, b1, b2)
    w1 -= lrt * dw1
    w2 -= lrt * dw2
    b1 -= lrt * db1
    b2 -= lrt * db2
    print(loss(X, y, w1, w2, b1, b2))

0.0002764860386006949
0.00027645525561112764
0.0002764244793706056
0.00027639370987692454
0.0002763629471278825
0.0002763321911212824
0.0002763014418549214
0.0002762706993265958
0.00027623996353410874
0.00027620923447527017
0.00027617851214786944
0.0002761477965497223
0.00027611708767862937
0.00027608638553239614
0.00027605569010883475
0.0002760250014057475
0.00027599431942094445
0.00027596364415223844
0.00027593297559743774
0.0002759023137543523
0.00027587165862080033
0.0002758410101945949
0.00027581036847354343
0.0002757797334554724
0.00027574910513818813
0.0002757184835195156
0.0002756878685972698
0.0002756572603692697
0.0002756266588333395
0.0002755960639872932
0.0002755654758289598
0.000275534894356161
0.000275504319566718
0.0002754737514584592
0.00027544319002920705
0.0002754126352767977
0.00027538208719904477
0.00027535154579378925
0.0002753210110588557
0.0002752904829920736
0.000275259961591273
0.0002752294468542965
0.0002751989387789653
0.0002751684373631198
0.0002751379426045

In [56]:
def XOR2(x1, x2):
    return forward([x1, x2], w1, w2, b1, b2)[0].round().astype(int)[0]

In [57]:
XOR2(0, 0), XOR2(0, 1), XOR2(1, 0), XOR2(1, 1)

(0, 1, 1, 0)

# Half Adder

In [58]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0, 0], [0, 1], [0, 1], [1, 0]])
X, y

(array([[0, 0],
        [0, 1],
        [1, 0],
        [1, 1]]),
 array([[0, 0],
        [0, 1],
        [0, 1],
        [1, 0]]))

y: [carry, sum]

In [59]:
w1 = np.random.rand(2, 2)
w2 = np.random.rand(2, 2)
b1 = np.random.rand(2)
b2 = np.random.rand(2)
print(f"{w1=}\n{w2=}\n{b1=}, {b2=}")

w1=array([[0.45606998, 0.78517596],
       [0.19967378, 0.51423444]])
w2=array([[0.59241457, 0.04645041],
       [0.60754485, 0.17052412]])
b1=array([0.06505159, 0.94888554]), b2=array([0.96563203, 0.80839735])


In [60]:
def forward(x, w1, w2, b1, b2):
    s1 = np.dot(x, w1) + b1
    z1 = sigmoid(s1)
    s2 = np.dot(z1, w2) + b2
    z2 = sigmoid(s2)
    return z2, s2, z1, s1

forward(X[0], w1, w2, b1, b2)

(array([0.84676414, 0.72218121]),
 array([1.70944391, 0.95530701]),
 array([0.51625717, 0.720891  ]),
 array([0.06505159, 0.94888554]))

In [61]:
def loss(X, y, w1, w2, b1, b2):
    y_pred, _, _, _ = forward(X, w1, w2, b1, b2)
    loss = 0
    for y_true, y_pred in zip(y, y_pred):
        loss += 0.5 * (y_true - y_pred) ** 2
    return loss[0]

loss(X, y, w1, w2, b1, b2)

1.1080985562421628

In [62]:
def backward(X, y, w1, w2, b1, b2):
    del_w1 = np.zeros_like(w1)
    del_w2 = np.zeros_like(w2)
    del_b1 = np.zeros_like(b1)
    del_b2 = np.zeros_like(b2)

    y_pred, s2, z1, s1 = forward(X, w1, w2, b1, b2)

    del_z2 = y_pred - y

    del_s2 = segmoid_prime(s2)

    del_w2 += np.dot(z1.T, del_z2 * del_s2)

    del_z1 = np.dot(del_z2 * del_s2, w2.T)

    del_s1 = segmoid_prime(s1)

    del_w1 += np.dot(X.T, del_z1 * del_s1)

    del_b2 += np.sum(del_z2 * del_s2, axis=0)

    del_b1 += np.sum(del_z1 * del_s1, axis=0)

    return del_w1, del_w2, del_b1, del_b2

backward(X, y, w1, w2, b1, b2)

(array([[0.01302675, 0.00803441],
        [0.01422448, 0.00966333]]),
 array([[0.16985128, 0.10685975],
        [0.23733058, 0.1444018 ]]),
 array([0.04559418, 0.03473642]),
 array([0.30151753, 0.18022159]))

In [63]:
lrt = 1
for _ in range(10000):
    dw1, dw2, db1, db2 = backward(X, y, w1, w2, b1, b2)
    w1 -= lrt * dw1
    w2 -= lrt * dw2
    b1 -= lrt * db1
    b2 -= lrt * db2
    print(loss(X, y, w1, w2, b1, b2))


0.9059881002277557
0.6400725478167372
0.4611946204834877
0.4083954412264893
0.39421299189972214
0.38923099170606185
0.3870644695489326
0.38594169897937025
0.38525721178758165
0.38477363262070574
0.38438893354184445
0.3840563677358342
0.38375349286562055
0.38346912629436025
0.3831975015750472
0.3829355298052534
0.38268147302776406
0.3824342859586239
0.3821932835506673
0.3819579709137474
0.38172795542655813
0.38150290093456846
0.38128250367365507
0.38106647947314265
0.38085455683817093
0.38064647310519617
0.38044197220713516
0.3802408032830748
0.380042719731979
0.37984747850047096
0.3796548394946342
0.3794645650581245
0.37927641948635615
0.3790901685609399
0.3789055790961253
0.3787224184929907
0.3785404542992264
0.37835945377348285
0.3781791834538589
0.3779994087304471
0.3778198934220501
0.37764039935731564
0.3774606859606281
0.3772805098431759
0.37709962439968314
0.3769177794113636
0.3767347206557235
0.3765501895239095
0.37636392264636737
0.37617565152764443
0.37598510219124065
0.375791

0.0036574494599236054
0.003645022879271977
0.003632702583088338
0.0036204870135713146
0.0036083746444176817
0.003596363980049359
0.003584453554861735
0.0035726419324925056
0.0035609277051105274
0.00354930949272409
0.003537785942508124
0.0035263557281497325
0.003515017549211417
0.0035037701305117817
0.003492612221522866
0.003481542595783896
0.0034705600503307918
0.0034596634051411133
0.0034488515025939493
0.0034381232069442347
0.0034274774038112387
0.0034169129996807395
0.0034064289214203666
0.003396024115808002
0.003385697549072637
0.0033754482064473133
0.003365275091734123
0.00335517722688038
0.00334515365156623
0.0033352034228028935
0.0033253256145415124
0.003315519317292204
0.003305783637753021
0.003296117698448618
0.0032865206373781635
0.00327699160767248
0.0032675297772599638
0.0032581343285410765
0.0032488044580712347
0.003239539376251782
0.0032303383070288122
0.0032212004875997228
0.0032121251681271217
0.003203111611460068
0.003194159092862224
0.0031852668997469453
0.00317643433

In [64]:
def half_adder(x1, x2):
    s = np.array([x1, x2])
    z, _, _, _ = forward(s, w1, w2, b1, b2)
    return z.round().astype(int)

In [65]:
half_adder(0, 0), half_adder(0, 1), half_adder(1, 0), half_adder(1, 1)

(array([0, 0]), array([0, 1]), array([0, 1]), array([1, 0]))