# Preparation

In [1]:
import numpy as np

# Create Dummy Data

In [2]:
# Dataset XOR
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])

# Model Definition

In [3]:
# Parameters
input_size = 2
hidden_size = 2
output_size = 1
lr = 0.1

# weight initialization
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Model Training

In [4]:
for epoch in range(10000):
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    error = y - a2

    d_a2 = error * sigmoid_derivative(a2)
    d_W2 = np.dot(a1.T, d_a2)
    d_b2 = np.sum(d_a2, axis=0, keepdims=True)

    d_a1 = np.dot(d_a2, W2.T) * sigmoid_derivative(a1)
    d_W1 = np.dot(X.T, d_a1)
    d_b1 = np.sum(d_a1, axis=0, keepdims=True)

    W1 += lr * d_W1
    b1 += lr * d_b1
    W2 += lr * d_W2
    b2 += lr * d_b2

    if epoch % 1000 == 0:
        loss = np.mean(np.square(error))
        print(f"Epoch {epoch}, Loss: {loss}")

print("Prediksi:")
print(a2)

Epoch 0, Loss: 0.2608909593785421
Epoch 1000, Loss: 0.24683178250881707
Epoch 2000, Loss: 0.2139044926052045
Epoch 3000, Loss: 0.06664500161583195
Epoch 4000, Loss: 0.01795358926514981
Epoch 5000, Loss: 0.00904758716253779
Epoch 6000, Loss: 0.0058296922966290295
Epoch 7000, Loss: 0.004235207953931878
Epoch 8000, Loss: 0.003299451562356559
Epoch 9000, Loss: 0.002689769228950644
Prediksi:
[[0.05264851]
 [0.95527302]
 [0.95404956]
 [0.0465971 ]]


---

# Questions

1. Ubah jumlah neuron hidden layer menjadi 3.

In [5]:
# Parameters
input_size = 2
hidden_size = 3 # update to 3
output_size = 1
lr = 0.1

# weight initialization
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [6]:
for epoch in range(10000):
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    error = y - a2

    d_a2 = error * sigmoid_derivative(a2)
    d_W2 = np.dot(a1.T, d_a2)
    d_b2 = np.sum(d_a2, axis=0, keepdims=True)

    d_a1 = np.dot(d_a2, W2.T) * sigmoid_derivative(a1)
    d_W1 = np.dot(X.T, d_a1)
    d_b1 = np.sum(d_a1, axis=0, keepdims=True)

    W1 += lr * d_W1
    b1 += lr * d_b1
    W2 += lr * d_W2
    b2 += lr * d_b2

    if epoch % 1000 == 0:
        loss = np.mean(np.square(error))
        print(f"Epoch {epoch}, Loss: {loss}")

print("Prediksi:")
print(a2)

Epoch 0, Loss: 0.26507900768373116
Epoch 1000, Loss: 0.23119023576726516
Epoch 2000, Loss: 0.17480857152754764
Epoch 3000, Loss: 0.10279618056304898
Epoch 4000, Loss: 0.03590326203395383
Epoch 5000, Loss: 0.014934355301542326
Epoch 6000, Loss: 0.008456462493709198
Epoch 7000, Loss: 0.005678249059083687
Epoch 8000, Loss: 0.004201487640187
Epoch 9000, Loss: 0.003303913925825174
Prediksi:
[[0.04259867]
 [0.93582872]
 [0.95856948]
 [0.05641452]]


2. Bandingkan hasil loss dengan konfigurasi awal.

**Jawab:** Dengan menambahkan 1 neuron tambahan pada hidden layer, dapat menghasilkan loss yang sedikit lebih tinggi pada akhir epoch ($0.00330$ berbanding $0.00268$). Namun, dari sisi kurva loss yang diperoleh, penambahan neuron baru dapat menghasilkan kurva yang lebih *smooth*, proses penurunan loss bisa terjadi secara perlahan. Ini biasanya karena dengan semakin bertambahnya neuron, maka bisa memungkinkan proses yang lebih stabil.

3. Tambahkan fungsi aktivasi ReLU dan bandingkan hasil.

In [10]:
# relu functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

In [11]:
# Parameters
input_size = 2
hidden_size = 3 # update to 3
output_size = 1
lr = 0.1

# weight initialization
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

In [12]:
for epoch in range(10000):
    # forward pass
    z1 = np.dot(X, W1) + b1
    a1 = relu(z1) # perubahan dari sigmoid function menjadi relu
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    error = y - a2

    d_a2 = error * sigmoid_derivative(a2)
    d_W2 = np.dot(a1.T, d_a2)
    d_b2 = np.sum(d_a2, axis=0, keepdims=True)

    d_a1 = np.dot(d_a2, W2.T) * relu_derivative(a1) # perubahan menjadi relu derivative
    d_W1 = np.dot(X.T, d_a1)
    d_b1 = np.sum(d_a1, axis=0, keepdims=True)

    W1 += lr * d_W1
    b1 += lr * d_b1
    W2 += lr * d_W2
    b2 += lr * d_b2

    if epoch % 1000 == 0:
        loss = np.mean(np.square(error))
        print(f"Epoch {epoch}, Loss: {loss}")

print("Prediksi:")
print(a2)

Epoch 0, Loss: 0.26059201168945956
Epoch 1000, Loss: 0.16708116997603079
Epoch 2000, Loss: 0.16682308662785683
Epoch 3000, Loss: 0.16678087738240846
Epoch 4000, Loss: 0.16674646799959453
Epoch 5000, Loss: 0.16671999105005358
Epoch 6000, Loss: 0.1667038903333133
Epoch 7000, Loss: 0.1666994448476091
Epoch 8000, Loss: 0.16670052752500733
Epoch 9000, Loss: 0.16669557391409204
Prediksi:
[[0.66650949]
 [0.66650949]
 [0.66650949]
 [0.01049776]]


Hasil yang diperoleh cenderung lebih buruk dari metode sebelumnya. Loss yang didapat cenderung stuck di sekitar $0.166...$. Hal ini dikarenakan data yang ada (XOR) cenderung tidak terpisah secara linear. Sedangkan ReLU cenderung lebih cocok pada data yang memiliki sifat lebih linear. Jika neural network tidak memiliki cukup neuron, maka dalam hal ini, hasilnya bisa lebih buruk.