## Main part

In [1]:
import numpy as np

np.random.seed(0)

x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

```
     Input Layer (2 neurons)
            ||
            \/
     Hidden Layer (3 neurons)
            ||
            \/
     Output Layer (1 neuron)
```

In [2]:
W = np.random.randn(1, 2)
b = np.random.randn(1, 1)

print(np.dot(W,x.T)+b)

[[0.97873798 1.37889519 2.74279033 3.14294754]]


In [3]:
W1 = np.random.randn(3, 2)
b1 = np.zeros((3,1))
W2 = np.random.randn(1, 3)
b2 = np.zeros((1,1))

print(np.dot(W1,x.T)+b1)
print()
print(np.dot(W2,np.dot(W1,x.T)+b1)+b2)

[[ 0.          1.86755799  2.2408932   4.10845119]
 [ 0.          0.95008842 -0.97727788 -0.02718946]
 [ 0.         -0.10321885 -0.15135721 -0.25457606]]

[[0.         0.7535622  0.55922202 1.31278422]]


In [4]:
def relu(x): return np.maximum(0, x)

In [5]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [6]:
Z1 = np.dot(W1,x.T)
A1 = relu(Z1)
Z2 = np.dot(W2,A1)+b2
A2 = sigmoid(Z2)

print(A2)

[[0.5        0.71170324 0.71506399 0.84381919]]


In [7]:
def bceloss(y_hat, y): 
    y_hat = np.clip(y_hat, 1e-8, 1 - 1e-8)
    y = y.T
    return -np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) / y.shape[0]

In [8]:
loss = bceloss(A2, y)
loss

3.2253656216592548

<img src='Дизайн без названия.png' width='55%'/>

In [9]:
m=y.shape[0]

dA2 = -(y.T/A2) + ((1-y.T)/(1-A2))
dZ2 = dA2 * (A2 * (1-A2))
dW2 = (1/m) * np.dot(dZ2, A1.T)
db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
dA1 = np.dot(W2.T, dZ2)
dZ1 = dA1 * np.where(A1 > 0, 1, 0)
# dZ1[dZ1 <= 0] = 1e-8
dW1 = (1/m) * np.dot(dZ1, x)
db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

print(dW1)
print()
print(db1)

[[ 0.05736915  0.05702417]
 [ 0.         -0.01038182]
 [ 0.          0.        ]]

[[ 0.02777559]
 [-0.01038182]
 [ 0.        ]]


In [10]:
dW2, W2

(array([[ 0.57246697, -0.06847685,  0.        ]]),
 array([[0.4105985 , 0.14404357, 1.45427351]]))

In [11]:
learning_rate = 0.5

print(np.dot(W2,np.dot(W1,x.T)+b1)+b2)

W1 = W1 - learning_rate * dW1
b1 = b1 - learning_rate * db1
W2 = W2 - learning_rate * dW2
b2 = b2 - learning_rate * db2

print(y.T)
print()
print(np.dot(W2,np.dot(W1,x.T)+b1)+b2)

[[0.         0.7535622  0.55922202 1.31278422]]
[[0 1 1 0]]

[[-0.09712501  0.15178863 -0.21634948  0.03256416]]


In [12]:
for i in range(10):
    m=y.shape[0]

    dA2 = -(y.T/A2) + ((1-y.T)/(1-A2))
    dZ2 = dA2 * (A2 * (1-A2))
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * np.where(Z1 > 0, 1, 0)
    dZ1[dZ1 <= 0] = 1e-8
    dW1 = (1/m) * np.dot(dZ1, x)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

    learning_rate = 1 - i/2000

    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2

    if i % 1 == 0:
        print(np.dot(W2,np.dot(W1,x.T)+b1)+b2)

        print(learning_rate)

[[-0.26970974 -0.99641848 -1.71051536 -2.4372241 ]]
1.0
[[-0.37346062 -2.03910763 -3.09937423 -4.76502124]]
0.9995
[[-0.27333926 -2.77494719 -4.1824053  -6.68401322]]
0.999
[[ 0.17075003 -2.99338294 -4.7498757  -7.91400867]]
0.9985
[[ 1.09848148 -2.4844941  -4.59268353 -8.17565911]]
0.998
[[ 2.64910841 -1.03899247 -3.50235702 -7.1904579 ]]
0.9975
[[ 4.96146375  1.55177829 -1.27105375 -4.68073921]]
0.997
[[ 8.17396054  5.49584351  2.30844009 -0.36967694]]
0.9965
[[12.4245925  11.00059827  7.44271057  6.01871635]]
0.996
[[17.85093452 18.27280821 14.33771675 14.75959044]]
0.9955


In [13]:
5.49584351  - 11.00059827  

-5.504754759999999

In [44]:
class NursikJunior:
    def __init__(self, units, learning_rate=1):
        self.learning_rate = learning_rate

        self.W = []
        self.b = []

        for i in range(len(units)-1):
            self.W.append(np.random.randn(units[i+1], units[i]) * 0.01)
            self.b.append(np.zeros((units[i+1], 1)))

        self.A = []
        self.Z = []
        

    def sigmoid(self, x):
        return (1 / (1 + np.exp(-x)))

    def relu(self, x):
        return np.maximum(0, x)

    def forward(self, x):
        self.A = []
        self.Z = []
        
        Z_last = x.T
        self.A.append(Z_last)

        # print('forward: ',self.W)
        for i in range(len(self.W)):
            Z = np.dot(self.W[i], Z_last) + self.b[i]
            self.Z.append(Z)
            A = self.relu(Z) if i < len(self.W) - 1 else self.sigmoid(Z)
            self.A.append(A)
            Z_last = A
            
        return self.A[-1]
    
    def backward(self, y):

        # print('backfard: ',self.W)
        for i in range(len(self.W)-1, -1, -1):
            if i == len(self.W) - 1:
                dA = -(1/y.shape[0]) * (y.T/self.A[i+1] - (1-y).T/(1-self.A[i+1]))
                dZ = dA * self.A[i+1] * (1 - self.A[i+1])
            else:
                dA = np.dot(self.W[i+1].T, dZ)
                dZ = dA*np.where(self.Z[i] > 0, 1, 0)
            dW = (1/y.shape[0]) * np.dot(dZ, self.A[i].T)
            db = (1/y.shape[0]) * np.sum(dZ, axis=1, keepdims=True)

            self.W[i] = self.W[i] - self.learning_rate * dW
            self.b[i] = self.b[i] - self.learning_rate * db

        # print(self.W)
    def predict(self, x):
        return self.forward(x) > 0.5

    def loss(self, y_hat, y):
        y_hat = np.clip(y_hat, 1e-8, 1 - 1e-8)
        y =y.T
        return -(1/y.shape[0]) * np.sum(y*np.log(y_hat) + (1-y)*np.log(1-y_hat))

In [45]:
units = [2,3,5,1]

In [47]:
NJ = NursikJunior(units, 1)
for i in range(10000):

    NJ.forward(x)
    # print(NJ.A[-1])

    NJ.backward(y)
    if i % 1000 == 0:

        print(NJ.loss(NJ.A[-1], y))
        # if np.round(NJ.A[-1][0][0],3) == 0.5 and np.round(NJ.A[-1][0][1],3) == 0.5:
        #     break


print(NJ.A[-1])


2.772588677750069
2.7725870124633674
2.7725777857251064
2.77220572514621
0.03694302799189906
0.011903753765027272
0.007054564915184421
0.005000482156754122
0.0038695953002968737
0.0031538819744965536
[[0.00128791 0.99995787 0.99995806 0.00128791]]


## BatchNorm part

<img src='justtemp (2).png' width='55%'>

In [48]:
def BatchNorm(x, gamma=None, beta=None):
    mean = np.mean(x, axis=0)
    varience = np.std(x, axis=0)

    normalized = (x - mean) / np.sqrt(varience + 1e-8)
    result = gamma*normalized + beta

    return result, normalized, gamma, beta, varience, mean


def batchnorm_backward(a, a_hat, da_wave, gamma, beta, varience, mean):
    dbeta = np.sum(da_wave, axis=0)
    dgamma = np.sum(da_wave * a_hat, axis=0)

    da_hat = gamma * da_wave
    dvarience =  da_hat * (-1/2) * a_hat/(varience**2 + 1e-8)
    dmean = dvarience * np.sum(2/a_hat.shape[0] * (a - mean), axis=0) + np.sum(da_hat * (-1/np.sqrt(varience**2 + 1e-8)), axis=0)
  
    da = dmean / a_hat.shape[0] + dvarience * 2/a_hat.shape[0] * (a - mean) + da_hat / (1/np.sqrt(varience**2 + 1e-8))

    return dbeta, dgamma, da


In [50]:
W1 = np.random.randn(3, 2)* 0.01
b1 = np.zeros((3,1))
W2 = np.random.randn(5, 3)* 0.01
b2 = np.zeros((5,1))
W3 = np.random.randn(1, 5)* 0.01
b3 = np.zeros((1,1))

learning_rate = 0.5

gamma1 = 1
beta1 = 0
gamma2 = 1
beta2 = 0

for i in range(10000):
    

    Z1 = np.dot(W1,x.T)+b1
    N1, normalized1, gamma1, beta1, varience1, mean1 = BatchNorm(Z1, gamma1, beta1)
    A1 = relu(N1)
    Z2 = np.dot(W2,A1)+b2
    N2, normalized2, gamma2, beta2, varience2, mean2 = BatchNorm(Z2, gamma2, beta2)
    A2 = relu(N2)
    Z3 = np.dot(W3,A2)+b3

    
    try: A3 = sigmoid(Z3)
    except: print(i);break

    loss = bceloss(A3, y)
    if i % 1000 == 0:
        print(loss)
        print(A3)
        print()


    dA3 = - ( y.T/A3) + (1-y.T)/(1-A3)
    dZ3 = dA3 * A3 * (1-A3)
    dW3 = (1/y.shape[0]) * np.dot(dZ3, A2.T)
    db3 = (1/y.shape[0]) * np.sum(dZ3, axis=1, keepdims=True)
    dA2 = np.dot(W3.T, dZ3)
    dN2 = dA2 * np.where(Z2 > 0, 1, 0)
    dbeta2, dgamma2, dZ2 = batchnorm_backward(Z2, normalized2, dN2, gamma2, beta2, varience2, mean2)
    # dZ2[dZ2 <= 0] = 1e-8
    dW2 = (1/y.shape[0]) * np.dot(dZ2, A1.T)
    db2 = (1/y.shape[0]) * np.sum(dZ2, axis=1, keepdims=True)
    dA1 = np.dot(W2.T, dZ2)
    dN1 = dA1 * np.where(Z1 > 0, 1, 0)
    dbeta1, dgamma1, dZ1 = batchnorm_backward(Z1, normalized1, dN1, gamma1, beta1, varience1, mean1) 
    # dZ1[dZ1 <= 0] = 1e-8
    dW1 = (1/y.shape[0]) * np.dot(dZ1, x)
    db1 = (1/y.shape[0]) * np.sum(dZ1, axis=1, keepdims=True)

    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W3 = W3 - learning_rate * dW3
    b3 = b3 - learning_rate * db3

    gamma1 = gamma1 - learning_rate * dgamma1
    beta1 = beta1 - learning_rate * dbeta1
    gamma2 = gamma2 - learning_rate * dgamma2
    beta2 = beta2 - learning_rate * dbeta2

    if A3[0][0] == 0.5 and A3[0][1] == 0.5:
        break

2.7727333312068674
[[0.5        0.49992006 0.49990998 0.49990232]]

1.054178127960229
[[0.25593757 0.76691356 0.76691356 0.20370489]]

0.5994418280033131
[[0.16448579 0.86006326 0.86006326 0.11151297]]

0.4107733811577357
[[0.1203503  0.90188347 0.90188347 0.07318559]]

0.31030445712005944
[[0.09480426 0.92496023 0.92496023 0.05322265]]

0.2485613459935225
[[0.07823172 0.93942923 0.93942923 0.04125904]]

0.2069832202375455
[[0.06663116 0.94929847 0.94929847 0.03339405]]

0.1771614261234073
[[0.05806174 0.95644043 0.95644043 0.02787641]]

0.15476503848715473
[[0.05147333 0.96183924 0.96183924 0.0238156 ]]

0.13734663714155465
[[0.04624964 0.96605912 0.96605912 0.02071526]]



## Titanik part

In [None]:
import pandas as pd

titanik = pd.read_csv('Titanic-Dataset.csv')

titanik.fillna(0, inplace=True)

titanik.drop(['Ticket', 'Name', 'Cabin', 'PassengerId'], axis=1, inplace=True)

In [None]:
titanik = pd.get_dummies(titanik, drop_first=True)
x = titanik.drop(['Survived'], axis=1).to_numpy()
y = titanik['Survived'].to_numpy()
y = y.reshape(y.shape[0],1)

In [None]:
x[:3]

array([[3, 22.0, 1, 0, 7.25, True, False, False, True],
       [1, 38.0, 1, 0, 71.2833, False, True, False, False],
       [3, 26.0, 0, 0, 7.925, False, False, False, True]], dtype=object)

In [None]:
y[:3]

array([[0],
       [1],
       [1]], dtype=int64)

In [None]:
x.shape

(891, 9)

In [None]:
np.linalg.norm(np.float64(x), axis=0)

array([  73.28710664,  883.30712354,   36.41428291,   26.60826939,
       1766.87993463,   24.0208243 ,   12.9614814 ,    8.77496439,
         25.37715508])

In [None]:
# x = x/np.linalg.norm(np.float64(x), axis=0)
x = np.int64(x)

In [None]:
units = [9,25,5,1]

NJ = NursikJunior(units, 1)

for i in range(100000):

    NJ.forward(x)
    # print(NJ.A[-1])

    NJ.backward(y)
    if i % 1000 == 0:

        print(NJ.loss(NJ.A[-1], y))
        # if np.round(NJ.A[-1][0][0],3) == 0.5 and np.round(NJ.A[-1][0][1],3) == 0.5:
        #     break

617.5924744066583
607.2601674397192
601.3520190179339
597.9575658428776
595.9919879888412
594.8361394757412
594.1246114079502
593.6141522941086
593.0440733052071
591.6710984931344
584.9999114783645
570.2887587219384
565.3855799187542
563.2383572379917
561.715789735524
560.5573418180124
559.6626820234635
558.9469289343199
558.3494100139721
557.8201220899734
557.3245421519098
556.8350120261199
556.3212308501851
555.77873396264
555.1877780537114
554.5350768147205
553.810621945234
553.0068903134024
552.129110262936
551.1913862481314
550.1983150737071
549.2392878250499
548.3550064772483
547.575808550277
546.8955613671613
546.2761126069314
545.6983171823243
545.121005204257
544.5098971186196
543.8447535642459
543.1128737708814
542.2991853587032
541.371068153333
540.3044942709071
539.0875106446689
537.6972574811008
536.0694226986004
534.1650253722361
532.0073029495691
529.4942993745778
526.5406927945882
523.0546197235885
519.0483177379191
514.3909071920165
509.0098834600464
502.79284400824014

In [None]:
prediction = NJ.predict(x)
np.sum(prediction.reshape(891,1)==y)/y.shape[0]

0.8103254769921436