<a href="https://colab.research.google.com/github/Temple2001/ML_practice/blob/main/codes/DeepLearning_practice_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

def numerical_derivative(f, x):
    delta_x = 1e-4
    grad = np.zeros_like(x)

    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])

    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x)

        x[idx] = float(tmp_val) - delta_x
        fx2 = f(x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)

        x[idx] = tmp_val
        it.iternext()
    
    return grad

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

#Example 1

In [14]:
input_data = np.array([1,2])
target_data = np.array([1])

input_nodes = 2
hidden_nodes = 3
output_nodes = 1

In [15]:
W2 = np.random.rand(input_nodes, hidden_nodes)
b2 = np.random.rand(hidden_nodes)

W3 = np.random.rand(hidden_nodes, output_nodes)
b3 = np.random.rand(output_nodes)

print('W2 = ', W2, ', W2.shape = ', W2.shape)
print('b2 = ', b2, ', b2.shape = ', b2.shape)
print('W3 = ', W3, ', W3.shape = ', W3.shape)
print('b3 = ', b3, ', b3.shape = ', b3.shape)

W2 =  [[0.53248598 0.23959306 0.13702246]
 [0.19595643 0.00175622 0.96457319]] , W2.shape =  (2, 3)
b2 =  [0.64311704 0.26009541 0.39879043] , b2.shape =  (3,)
W3 =  [[0.04130656]
 [0.04062781]
 [0.46892452]] , W3.shape =  (3, 1)
b3 =  [0.22355864] , b3.shape =  (1,)


In [18]:
def feed_forward(xdata):
    delta = 1e-7

    Z2 = np.dot(xdata, W2) + b2

    print('[feed_forward function]')
    print('Z2 = ', Z2)

    A2 = sigmoid(Z2)

    print('A2 = ', A2)

    Z3 = np.dot(A2, W3) + b3

    print('Z3 = ', Z3)

    y = A3 = sigmoid(Z3)

    print('y = ', y, ', A3 = ', A3)

    loss = -np.sum(target_data*np.log(y+delta) + (1-target_data)*np.log((1-y)+delta))

    print('current loss val = ', loss)
    print('[feed_forward function]')

    return loss

In [19]:
loss_val = feed_forward(input_data)

[feed_forward function]
Z2 =  [1.56751588 0.50320091 2.46495928]
A2 =  [0.82742919 0.62321126 0.92164853]
Z3 =  [0.7152402]
y =  [0.67155802] , A3 =  [0.67155802]
current loss val =  0.39815472038315014
[feed_forward function]


**아래의 연속된 출력은 feed_forward 함수가 가중치와 바이어스를 조정하며 호출되고 출력되는 결과**

In [20]:
learning_rate = 1e-1

f = lambda x : feed_forward(input_data)
print('\n=================================================')
print('initial W2 = ', W2)
print('initial b2 = ', b2)
print('initial W3 = ', W3)
print('initial b3 = ', b3)
print('=================================================\n')

W2 -= learning_rate * numerical_derivative( f, W2 )
b2 -= learning_rate * numerical_derivative( f, b2 )

W3 -= learning_rate * numerical_derivative( f, W3 )
b3 -= learning_rate * numerical_derivative( f, b3 )

print('\n=================================================')
print('updated W2 = ', W2)
print('updated b2 = ', b2)
print('updated W3 = ', W3)
print('updated b3 = ', b3)
print('=================================================\n')


initial W2 =  [[0.53248598 0.23959306 0.13702246]
 [0.19595643 0.00175622 0.96457319]]
initial b2 =  [0.64311704 0.26009541 0.39879043]
initial W3 =  [[0.04130656]
 [0.04062781]
 [0.46892452]]
initial b3 =  [0.22355864]

[feed_forward function]
Z2 =  [1.56761588 0.50320091 2.46495928]
A2 =  [0.82744347 0.62321126 0.92164853]
Z3 =  [0.71524079]
y =  [0.67155815] , A3 =  [0.67155815]
current loss val =  0.39815452666893997
[feed_forward function]
[feed_forward function]
Z2 =  [1.56741588 0.50320091 2.46495928]
A2 =  [0.82741491 0.62321126 0.92164853]
Z3 =  [0.71523961]
y =  [0.67155789] , A3 =  [0.67155789]
current loss val =  0.3981549141101232
[feed_forward function]
[feed_forward function]
Z2 =  [1.56751588 0.50330091 2.46495928]
A2 =  [0.82742919 0.62323474 0.92164853]
Z3 =  [0.71524115]
y =  [0.67155823] , A3 =  [0.67155823]
current loss val =  0.3981544070475878
[feed_forward function]
[feed_forward function]
Z2 =  [1.56751588 0.50310091 2.46495928]
A2 =  [0.82742919 0.62318778 0.

#Example 2

In [2]:
def feed_forward(xdata, tdata):
    delta = 1e-7

    z2 = np.dot(xdata, W2) + b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, W3) + b3
    y = a3 = sigmoid(z3)

    return -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) )

def loss_val(xdata, tdata):
    delta = 1e-7
    
    z2 = np.dot(xdata, W2) + b2 
    a2 = sigmoid(z2)
        
    z3 = np.dot(a2, W3) + b3  
    y = a3 = sigmoid(z3) 
    
    
    return  -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) ) 

def predict(xdata):
    z2 = np.dot(xdata, W2) + b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, W3) + b3
    y = a3 = sigmoid(z3)

    if y >= 0.5:
        result = 1
    else:
        result = 0
    
    return y, result

In [3]:
xdata = np.array([[0,0],[0,1],[1,0],[1,1]])

and_tdata = np.array([0, 0, 0, 1]).reshape(4,1)
or_tdata = np.array([0, 1, 1, 1]).reshape(4,1)
nand_tdata = np.array([1, 1, 1, 0]).reshape(4,1)
xor_tdata = np.array([0, 1, 1, 0]).reshape(4,1)

test_data = np.array([[0,0],[0,1],[1,0],[1,1]])

**AND 학습 (은닉노드 10개)**

In [5]:
from datetime import datetime

input_nodes = 2
hidden_nodes = 10
output_nodes = 1

W2 = np.random.rand(input_nodes, hidden_nodes)
b2 = np.random.rand(hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
b3 = np.random.rand(output_nodes)

learning_rate = 1e-1

f = lambda x : feed_forward(xdata, and_tdata)

print('initial loss value = ', loss_val(xdata, and_tdata))

start_time = datetime.now()

for step in range(10001):
    W2 -= learning_rate * numerical_derivative(f, W2)
    b2 -= learning_rate * numerical_derivative(f, b2)
    W3 -= learning_rate * numerical_derivative(f, W3)
    b3 -= learning_rate * numerical_derivative(f, b3)

    if (step % 500 == 0):
        print('step = ', step, ', loss value = ', loss_val(xdata, and_tdata))

end_time = datetime.now()

print('')
print('Elapsed time => ', end_time - start_time)

initial loss value =  11.749105050940189
step =  0 , loss value =  6.8316988565771375
step =  500 , loss value =  0.1406482614384342
step =  1000 , loss value =  0.03927233598847962
step =  1500 , loss value =  0.020927454670583618
step =  2000 , loss value =  0.013847244837319512
step =  2500 , loss value =  0.010196872904211701
step =  3000 , loss value =  0.008000968618667663
step =  3500 , loss value =  0.006546816284219779
step =  4000 , loss value =  0.005518545303262873
step =  4500 , loss value =  0.004755916618089627
step =  5000 , loss value =  0.004169448978865927
step =  5500 , loss value =  0.0037054454352324134
step =  6000 , loss value =  0.0033298261677743657
step =  6500 , loss value =  0.003019968738497286
step =  7000 , loss value =  0.002760293409546803
step =  7500 , loss value =  0.0025397359657285696
step =  8000 , loss value =  0.0023502317059994
step =  8500 , loss value =  0.002185769395881441
step =  9000 , loss value =  0.0020417806247241873
step =  9500 , l

In [6]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print('real_val = ', real_val, ', logical_val = ', logical_val)

real_val =  [8.2677464e-08] , logical_val =  0
real_val =  [0.00043981] , logical_val =  0
real_val =  [0.00046781] , logical_val =  0
real_val =  [0.99910606] , logical_val =  1


**AND 학습 (은닉노드 1개)**

In [7]:
input_nodes = 2
hidden_nodes = 1
output_nodes = 1

W2 = np.random.rand(input_nodes, hidden_nodes)
b2 = np.random.rand(hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
b3 = np.random.rand(output_nodes)

learning_rate = 1e-1

f = lambda x : feed_forward(xdata, and_tdata)

print('initial loss value = ', loss_val(xdata, and_tdata))

start_time = datetime.now()

for step in range(10001):
    W2 -= learning_rate * numerical_derivative(f, W2)
    b2 -= learning_rate * numerical_derivative(f, b2)
    W3 -= learning_rate * numerical_derivative(f, W3)
    b3 -= learning_rate * numerical_derivative(f, b3)

    if (step % 500 == 0):
        print('step = ', step, ', loss value = ', loss_val(xdata, and_tdata))

end_time = datetime.now()

print('')
print('Elapsed time => ', end_time - start_time)

initial loss value =  4.214396198100818
step =  0 , loss value =  3.684218228703605
step =  500 , loss value =  1.8513807299933844
step =  1000 , loss value =  0.18278284785678525
step =  1500 , loss value =  0.07378723200698478
step =  2000 , loss value =  0.044993820096362484
step =  2500 , loss value =  0.032100590640595256
step =  3000 , loss value =  0.02485966436537694
step =  3500 , loss value =  0.020243957992851083
step =  4000 , loss value =  0.01705319136575069
step =  4500 , loss value =  0.014719526698922002
step =  5000 , loss value =  0.012940437167249675
step =  5500 , loss value =  0.011540309181573966
step =  6000 , loss value =  0.010410355350721934
step =  6500 , loss value =  0.009479658354954235
step =  7000 , loss value =  0.00870004675360241
step =  7500 , loss value =  0.008037671702228135
step =  8000 , loss value =  0.007468063072304158
step =  8500 , loss value =  0.006973098120856223
step =  9000 , loss value =  0.006539072780604727
step =  9500 , loss valu

In [8]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print('real_val = ', real_val, ', logical_val = ', logical_val)

real_val =  [0.00054908] , logical_val =  0
real_val =  [0.00143362] , logical_val =  0
real_val =  [0.00143362] , logical_val =  0
real_val =  [0.99760707] , logical_val =  1


**OR 학습 (은닉노드 1개)**

In [9]:
input_nodes = 2
hidden_nodes = 1
output_nodes = 1

W2 = np.random.rand(input_nodes, hidden_nodes)
b2 = np.random.rand(hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
b3 = np.random.rand(output_nodes)

learning_rate = 1e-1

f = lambda x : feed_forward(xdata, or_tdata)

print('initial loss value = ', loss_val(xdata, or_tdata))

start_time = datetime.now()

for step in range(10001):
    W2 -= learning_rate * numerical_derivative(f, W2)
    b2 -= learning_rate * numerical_derivative(f, b2)
    W3 -= learning_rate * numerical_derivative(f, W3)
    b3 -= learning_rate * numerical_derivative(f, b3)

    if (step % 500 == 0):
        print('step = ', step, ', loss value = ', loss_val(xdata, or_tdata))

end_time = datetime.now()

print('')
print('Elapsed time => ', end_time - start_time)

initial loss value =  2.2242720100311395
step =  0 , loss value =  2.2219698724674863
step =  500 , loss value =  0.2194106763571518
step =  1000 , loss value =  0.07609865107441055
step =  1500 , loss value =  0.04482441856625162
step =  2000 , loss value =  0.031560217404670965
step =  2500 , loss value =  0.024288576047562998
step =  3000 , loss value =  0.01971333457944472
step =  3500 , loss value =  0.01657528233000651
step =  4000 , loss value =  0.014291771499550285
step =  4500 , loss value =  0.012556867480011304
step =  5000 , loss value =  0.011194771219254994
step =  5500 , loss value =  0.010097382350928642
step =  6000 , loss value =  0.009194622134386949
step =  6500 , loss value =  0.008439092387968
step =  7000 , loss value =  0.007797599703797786
step =  7500 , loss value =  0.007246212311153657
step =  8000 , loss value =  0.006767244747569032
step =  8500 , loss value =  0.0063473474061910996
step =  9000 , loss value =  0.00597625617766747
step =  9500 , loss valu

In [10]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print('real_val = ', real_val, ', logical_val = ', logical_val)

real_val =  [0.00316989] , logical_val =  0
real_val =  [0.99913909] , logical_val =  1
real_val =  [0.99913896] , logical_val =  1
real_val =  [0.99954724] , logical_val =  1


**NAND 학습 (은닉노드 1개)**

In [11]:
input_nodes = 2
hidden_nodes = 1
output_nodes = 1

W2 = np.random.rand(input_nodes, hidden_nodes)
b2 = np.random.rand(hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
b3 = np.random.rand(output_nodes)

learning_rate = 1e-1

f = lambda x : feed_forward(xdata, nand_tdata)

print('initial loss value = ', loss_val(xdata, nand_tdata))

start_time = datetime.now()

for step in range(10001):
    W2 -= learning_rate * numerical_derivative(f, W2)
    b2 -= learning_rate * numerical_derivative(f, b2)
    W3 -= learning_rate * numerical_derivative(f, W3)
    b3 -= learning_rate * numerical_derivative(f, b3)

    if (step % 500 == 0):
        print('step = ', step, ', loss value = ', loss_val(xdata, nand_tdata))

end_time = datetime.now()

print('')
print('Elapsed time => ', end_time - start_time)

initial loss value =  2.2931544024598365
step =  0 , loss value =  2.2906229743951587
step =  500 , loss value =  0.36232293567555063
step =  1000 , loss value =  0.09988771332048507
step =  1500 , loss value =  0.0544932561545861
step =  2000 , loss value =  0.03694119132029958
step =  2500 , loss value =  0.02778497221512722
step =  3000 , loss value =  0.022202797956342314
step =  3500 , loss value =  0.018457650340962327
step =  4000 , loss value =  0.015776786172743355
step =  4500 , loss value =  0.013765884627498278
step =  5000 , loss value =  0.012203263630236917
step =  5500 , loss value =  0.010954962289961437
step =  6000 , loss value =  0.009935365783817806
step =  6500 , loss value =  0.009087253920064958
step =  7000 , loss value =  0.00837095916486472
step =  7500 , loss value =  0.007758131350266867
step =  8000 , loss value =  0.00722797951857852
step =  8500 , loss value =  0.006764915564162575
step =  9000 , loss value =  0.0063570261346461154
step =  9500 , loss va

In [12]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print('real_val = ', real_val, ', logical_val = ', logical_val)

real_val =  [0.99972746] , logical_val =  1
real_val =  [0.99897617] , logical_val =  1
real_val =  [0.99897617] , logical_val =  1
real_val =  [0.00334518] , logical_val =  0


**XOR 학습 (은닉노드 1개)**

In [13]:
input_nodes = 2
hidden_nodes = 1
output_nodes = 1

W2 = np.random.rand(input_nodes, hidden_nodes)
b2 = np.random.rand(hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
b3 = np.random.rand(output_nodes)

learning_rate = 1e-1

f = lambda x : feed_forward(xdata, xor_tdata)

print('initial loss value = ', loss_val(xdata, xor_tdata))

start_time = datetime.now()

for step in range(10001):
    W2 -= learning_rate * numerical_derivative(f, W2)
    b2 -= learning_rate * numerical_derivative(f, b2)
    W3 -= learning_rate * numerical_derivative(f, W3)
    b3 -= learning_rate * numerical_derivative(f, b3)

    if (step % 500 == 0):
        print('step = ', step, ', loss value = ', loss_val(xdata, xor_tdata))

end_time = datetime.now()

print('')
print('Elapsed time => ', end_time - start_time)

initial loss value =  2.941666435943883
step =  0 , loss value =  2.895026870136912
step =  500 , loss value =  2.69441713322278
step =  1000 , loss value =  2.136200433337293
step =  1500 , loss value =  1.9974403171288952
step =  2000 , loss value =  1.9613072309507376
step =  2500 , loss value =  1.9456794531667592
step =  3000 , loss value =  1.937113132340146
step =  3500 , loss value =  1.9317472156457094
step =  4000 , loss value =  1.9280867450297863
step =  4500 , loss value =  1.9254375138069673
step =  5000 , loss value =  1.923435103681121
step =  5500 , loss value =  1.9218705115687147
step =  6000 , loss value =  1.9206155629428443
step =  6500 , loss value =  1.9195874140045999
step =  7000 , loss value =  1.918730206302426
step =  7500 , loss value =  1.9180049424613115
step =  8000 , loss value =  1.9173835912070036
step =  8500 , loss value =  1.9168454982644068
step =  9000 , loss value =  1.9163751170631023
step =  9500 , loss value =  1.915960526699492
step =  1000

In [14]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print('real_val = ', real_val, ', logical_val = ', logical_val)

real_val =  [0.0031346] , logical_val =  0
real_val =  [0.6656643] , logical_val =  1
real_val =  [0.66566429] , logical_val =  1
real_val =  [0.66663391] , logical_val =  1


**XOR 학습 (은닉노드 3개)**

In [15]:
input_nodes = 2
hidden_nodes = 3
output_nodes = 1

W2 = np.random.rand(input_nodes, hidden_nodes)
b2 = np.random.rand(hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
b3 = np.random.rand(output_nodes)

learning_rate = 1e-1

f = lambda x : feed_forward(xdata, xor_tdata)

print('initial loss value = ', loss_val(xdata, xor_tdata))

start_time = datetime.now()

for step in range(10001):
    W2 -= learning_rate * numerical_derivative(f, W2)
    b2 -= learning_rate * numerical_derivative(f, b2)
    W3 -= learning_rate * numerical_derivative(f, W3)
    b3 -= learning_rate * numerical_derivative(f, b3)

    if (step % 500 == 0):
        print('step = ', step, ', loss value = ', loss_val(xdata, xor_tdata))

end_time = datetime.now()

print('')
print('Elapsed time => ', end_time - start_time)

initial loss value =  4.584226804725713
step =  0 , loss value =  4.012208245729212
step =  500 , loss value =  2.685358566987008
step =  1000 , loss value =  1.6135869994746885
step =  1500 , loss value =  0.28614720425181506
step =  2000 , loss value =  0.11535584458511876
step =  2500 , loss value =  0.07033233826075788
step =  3000 , loss value =  0.05029861710318012
step =  3500 , loss value =  0.03906387149937424
step =  4000 , loss value =  0.031898181735393855
step =  4500 , loss value =  0.026937820781539952
step =  5000 , loss value =  0.02330384192110204
step =  5500 , loss value =  0.02052860954054484
step =  6000 , loss value =  0.018340765624149387
step =  6500 , loss value =  0.01657217554624258
step =  7000 , loss value =  0.015113172699923518
step =  7500 , loss value =  0.0138892078194469
step =  8000 , loss value =  0.012847850879363527
step =  8500 , loss value =  0.011951163169860685
step =  9000 , loss value =  0.01117101964849181
step =  9500 , loss value =  0.01

In [16]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print('real_val = ', real_val, ', logical_val = ', logical_val)

real_val =  [0.00039655] , logical_val =  0
real_val =  [0.99754836] , logical_val =  1
real_val =  [0.99732968] , logical_val =  1
real_val =  [0.00434585] , logical_val =  0
