In [10]:
import numpy as np

np.random.seed(0)

x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

```
     Input Layer (2 neurons)
            ||
            \/
     Hidden Layer (3 neurons)
            ||
            \/
     Output Layer (1 neuron)
```

In [11]:
W = np.random.randn(1, 2)
b = np.random.randn(1, 1)

print(np.dot(W,x.T)+b)

[[0.97873798 1.37889519 2.74279033 3.14294754]]


In [12]:
W1 = np.random.randn(3, 2)
b1 = np.zeros((3,1))
W2 = np.random.randn(1, 3)
b2 = np.zeros((1,1))

print(np.dot(W1,x.T)+b1)
print()
print(np.dot(W2,np.dot(W1,x.T)+b1)+b2)

[[ 0.          1.86755799  2.2408932   4.10845119]
 [ 0.          0.95008842 -0.97727788 -0.02718946]
 [ 0.         -0.10321885 -0.15135721 -0.25457606]]

[[0.         0.7535622  0.55922202 1.31278422]]


In [13]:
def relu(x): return np.maximum(0, x)

In [14]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [15]:
Z1 = np.dot(W1,x.T)
A1 = relu(Z1)
Z2 = np.dot(W2,A1)+b2
A2 = sigmoid(Z2)

print(A2)

[[0.5        0.71170324 0.71506399 0.84381919]]


In [16]:
def bceloss(y_hat, y): 
    y_hat = np.clip(y_hat, 1e-8, 1 - 1e-8)
    y = y.T
    return -np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) / y.shape[0]

In [17]:
loss = bceloss(A2, y)
loss

3.2253656216592548

<img src='Дизайн без названия.png' />

In [18]:
m=y.shape[0]

dA2 = -(y.T/A2) + ((1-y.T)/(1-A2))
dZ2 = dA2 * (A2 * (1-A2))
dW2 = (1/m) * np.dot(dZ2, A1.T)
db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
dA1 = np.dot(W2.T, dZ2)
dZ1 = dA1 * np.where(A1 > 0, 1, 0)
# dZ1[dZ1 <= 0] = 1e-8
dW1 = (1/m) * np.dot(dZ1, x)
db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

print(dW1)
print()
print(db1)

[[ 0.05736915  0.05702417]
 [ 0.         -0.01038182]
 [ 0.          0.        ]]

[[ 0.02777559]
 [-0.01038182]
 [ 0.        ]]


In [19]:
dW2, W2

(array([[ 0.57246697, -0.06847685,  0.        ]]),
 array([[0.4105985 , 0.14404357, 1.45427351]]))

In [20]:
learning_rate = 0.5

print(np.dot(W2,np.dot(W1,x.T)+b1)+b2)

W1 = W1 - learning_rate * dW1
b1 = b1 - learning_rate * db1
W2 = W2 - learning_rate * dW2
b2 = b2 - learning_rate * db2

print(y.T)
print()
print(np.dot(W2,np.dot(W1,x.T)+b1)+b2)

[[0.         0.7535622  0.55922202 1.31278422]]
[[0 1 1 0]]

[[-0.09712501  0.15178863 -0.21634948  0.03256416]]


In [21]:
for i in range(10):
    m=y.shape[0]

    dA2 = -(y.T/A2) + ((1-y.T)/(1-A2))
    dZ2 = dA2 * (A2 * (1-A2))
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * np.where(Z1 > 0, 1, 0)
    dZ1[dZ1 <= 0] = 1e-8
    dW1 = (1/m) * np.dot(dZ1, x)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

    learning_rate = 1 - i/2000

    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2

    if i % 1 == 0:
        print(np.dot(W2,np.dot(W1,x.T)+b1)+b2)

        print(learning_rate)

[[-0.26970974 -0.99641848 -1.71051536 -2.4372241 ]]
1.0
[[-0.37346062 -2.03910763 -3.09937423 -4.76502124]]
0.9995
[[-0.27333926 -2.77494719 -4.1824053  -6.68401322]]
0.999
[[ 0.17075003 -2.99338294 -4.7498757  -7.91400867]]
0.9985
[[ 1.09848148 -2.4844941  -4.59268353 -8.17565911]]
0.998
[[ 2.64910841 -1.03899247 -3.50235702 -7.1904579 ]]
0.9975
[[ 4.96146375  1.55177829 -1.27105375 -4.68073921]]
0.997
[[ 8.17396054  5.49584351  2.30844009 -0.36967694]]
0.9965
[[12.4245925  11.00059827  7.44271057  6.01871635]]
0.996
[[17.85093452 18.27280821 14.33771675 14.75959044]]
0.9955


In [22]:
5.49584351  - 11.00059827  

-5.504754759999999

In [23]:
class NursikJunior:
    def __init__(self, units, learning_rate=1):
        self.learning_rate = learning_rate

        self.W = []
        self.b = []

        for i in range(len(units)-1):
            self.W.append(np.random.randn(units[i+1], units[i]) * 0.01)
            self.b.append(np.zeros((units[i+1], 1)))

        self.A = []
        self.Z = []
        

    def sigmoid(self, x):
        return (1 / (1 + np.exp(-x)))

    def relu(self, x):
        return np.maximum(0, x)

    def forward(self, x):
        self.A = []
        self.Z = []
        
        Z_last = x.T
        self.A.append(Z_last)

        # print('forward: ',self.W)
        for i in range(len(self.W)):
            Z = np.dot(self.W[i], Z_last) + self.b[i]
            self.Z.append(Z)
            A = self.relu(Z) if i < len(self.W) - 1 else self.sigmoid(Z)
            self.A.append(A)
            Z_last = A
            
        return self.A[-1]
    
    def backward(self, y):

        # print('backfard: ',self.W)
        for i in range(len(self.W)-1, -1, -1):
            if i == len(self.W) - 1:
                dA = -(1/y.shape[0]) * (y.T/self.A[i+1] - (1-y).T/(1-self.A[i+1]))
                dZ = dA * self.A[i+1] * (1 - self.A[i+1])
            else:
                dA = np.dot(self.W[i+1].T, dZ)
                dZ = dA*np.where(self.Z[i] > 0, 1, 0)
            dW = (1/y.shape[0]) * np.dot(dZ, self.A[i].T)
            db = (1/y.shape[0]) * np.sum(dZ, axis=1, keepdims=True)

            self.W[i] = self.W[i] - self.learning_rate * dW
            self.b[i] = self.b[i] - self.learning_rate * db

        # print(self.W)
    def predict(self, x):
        return self.forward(x) > 0.5

    def loss(self, y_hat, y):
        y_hat = np.clip(y_hat, 1e-8, 1 - 1e-8)
        y =y.T
        return -(1/y.shape[0]) * np.sum(y*np.log(y_hat) + (1-y)*np.log(1-y_hat))

In [24]:
units = [2,5,1]

In [25]:
NJ = NursikJunior(units, 1)
for i in range(10000):

    NJ.forward(x)
    # print(NJ.A[-1])

    NJ.backward(y)
    if i % 1000 == 0:

        print(NJ.loss(NJ.A[-1], y))
        # if np.round(NJ.A[-1][0][0],3) == 0.5 and np.round(NJ.A[-1][0][1],3) == 0.5:
        #     break


print(NJ.A[-1])


2.7725867350465347
0.03656073104231354
0.014376700504454436
0.008818334793970045
0.006325458056069965
0.004917483708122243
0.0040154850520059526
0.0033898194550330127
0.0029301595246361955
0.0025790199377720814
[[1.76429539e-03 9.99839558e-01 9.99841087e-01 2.16960474e-04]]


In [26]:
W1 = np.random.randn(3, 2)* 0.01
b1 = np.zeros((3,1))
W2 = np.random.randn(5, 3)* 0.01
b2 = np.zeros((5,1))
W3 = np.random.randn(1, 5)* 0.01
b3 = np.zeros((1,1))

learning_rate = 1


for i in range(10000):
    

    Z1 = np.dot(W1,x.T)+b1
    A1 = relu(Z1)
    Z2 = np.dot(W2,A1)+b2
    A2 = relu(Z2)
    Z3 = np.dot(W3,A2)+b3

    
    try: A3 = sigmoid(Z3)
    except: print(i);break

    loss = bceloss(A3, y)
    if i % 1000 == 0:
        print(loss)
        print(A3)
        print()

    dA3 = - ( y.T/A3) + (1-y.T)/(1-A3)
    dZ3 = dA3 * A3 * (1-A3)
    dW3 = (1/y.shape[0]) * np.dot(dZ3, A2.T)
    db3 = (1/y.shape[0]) * np.sum(dZ3, axis=1, keepdims=True)
    dA2 = np.dot(W3.T, dZ3)
    dZ2 = dA2 * np.where(Z2 > 0, 1, 0)
    # dZ2[dZ2 <= 0] = 1e-8
    dW2 = (1/y.shape[0]) * np.dot(dZ2, A1.T)
    db2 = (1/y.shape[0]) * np.sum(dZ2, axis=1, keepdims=True)
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * np.where(Z1 > 0, 1, 0)
    # dZ1[dZ1 <= 0] = 1e-8
    dW1 = (1/y.shape[0]) * np.dot(dZ1, x)
    db1 = (1/y.shape[0]) * np.sum(dZ1, axis=1, keepdims=True)

    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W3 = W3 - learning_rate * dW3
    b3 = b3 - learning_rate * db3

    if A3[0][0] == 0.5 and A3[0][1] == 0.5:
        break

2.772588382759546
[[0.5        0.49999989 0.50000017 0.49999989]]

2.772588724361783
[[0.50000013 0.49999959 0.50000041 0.49999987]]

2.7725887222405072
[[0.50000013 0.49999959 0.50000041 0.49999987]]

2.7725887222405072
[[0.50000013 0.49999959 0.50000041 0.49999987]]

2.7725887222405072
[[0.50000013 0.49999959 0.50000041 0.49999987]]

2.7725887222405072
[[0.50000013 0.49999959 0.50000041 0.49999987]]

2.7725887222405072
[[0.50000013 0.49999959 0.50000041 0.49999987]]

2.7725887222405072
[[0.50000013 0.49999959 0.50000041 0.49999987]]

2.7725887222405072
[[0.50000013 0.49999959 0.50000041 0.49999987]]

2.7725887222405077
[[0.50000013 0.49999959 0.50000041 0.49999987]]



In [27]:
import pandas as pd

titanik = pd.read_csv('Titanic-Dataset.csv')

titanik.fillna(0, inplace=True)

titanik.drop(['Ticket', 'Name', 'Cabin', 'PassengerId'], axis=1, inplace=True)

In [53]:
titanik = pd.get_dummies(titanik, drop_first=True)
x = titanik.drop(['Survived'], axis=1).to_numpy()
y = titanik['Survived'].to_numpy()
y = y.reshape(y.shape[0],1)

In [54]:
x[:3]

array([[3, 22.0, 1, 0, 7.25, True, False, False, True],
       [1, 38.0, 1, 0, 71.2833, False, True, False, False],
       [3, 26.0, 0, 0, 7.925, False, False, False, True]], dtype=object)

In [55]:
y[:3]

array([[0],
       [1],
       [1]], dtype=int64)

In [56]:
x.shape

(891, 9)

In [57]:
np.linalg.norm(np.float64(x), axis=0)

array([  73.28710664,  883.30712354,   36.41428291,   26.60826939,
       1766.87993463,   24.0208243 ,   12.9614814 ,    8.77496439,
         25.37715508])

In [71]:
# x = x/np.linalg.norm(np.float64(x), axis=0)
x = np.int64(x)

In [97]:
units = [9,25,5,1]

NJ = NursikJunior(units, 1)

for i in range(100000):

    NJ.forward(x)
    # print(NJ.A[-1])

    NJ.backward(y)
    if i % 1000 == 0:

        print(NJ.loss(NJ.A[-1], y))
        # if np.round(NJ.A[-1][0][0],3) == 0.5 and np.round(NJ.A[-1][0][1],3) == 0.5:
        #     break

617.5997736850736
607.2651948800873
601.3587506085407
597.9702538078614
596.0173937277975
594.8861660327154
594.2264204526637
593.8365572155094
593.5984090204005
593.4386954630638
593.3024634831564
593.1205142098487
592.7209809220371
591.2861331577458
583.2905437959403
568.4261173393543
564.5111644441212
562.5358340742661
561.0773061598694
559.9224451436509
558.9886493796042
558.1900915028441
557.4681336956705
556.7777855559046
556.0787280453869
555.3405939781323
554.5324286361424
553.6236417444401
552.5819996275256
551.3840476347768
550.0179998400067
548.5049686303091
546.8805487446705
545.2359279615998
543.7239862616423
542.3521591416953
541.1614050358977
540.1757430587627
539.3722662924089
538.6570185166838
538.024155373764
537.3930180005736
536.7184839998531
535.9851224914646
535.1608624419289
534.2435081269728
533.2011709597202
531.9737669410554
530.5729573882213
528.9889790608729
527.237426426161
525.1094430206583
522.7636269675843
520.0007495879263
516.5920807394923
512.40983633

In [119]:
prediction = NJ.predict(x)
np.sum(prediction.reshape(891,1)==y)/y.shape[0]

0.7800224466891134