<a href="https://colab.research.google.com/github/JerryK97/dl12/blob/main/project/dl12_proj02_XOR_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# XOR problem
## numpy with back propagation
## MLP
- 1-hidden layer
- 2 hidden-layers
- hyperparameter setting

In [1]:
# numpy - bak propagation with iterations=20000
import numpy as np
# XOR solution by MLP

# 시그모이드 함수
def actf(x):
	return 1/(1+np.exp(-x))

# 시그모이드 함수의 미분치
def actf_deriv(x):
	    return x*(1-x)

# 입력유닛의 개수, 은닉유닛의 개수, 출력유닛의 개수
inputs, hiddens, outputs = 2, 2, 1
learning_rate = 0.5

# 훈련 입력과 출력
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # (4,2)
T = np.array([[0], [1], [1], [0]])              # (4,1)

# 가중치를 –1.0에서 1.0 사이의 난수로 초기화한다.
W1 = 2*np.random.random((inputs, hiddens))-1    # (2,2)
W2 = 2*np.random.random((hiddens, outputs))-1   # (2,1)
B1 = np.zeros(hiddens)                          # (2,)
B2 = np.zeros(outputs)                          # (1,)
print(W1.shape,W2.shape,B1.shape,B2.shape)

# 순방향 전파 계산
def predict(x):
        layer0 = x			# 입력을 layer0에 대입한다. 
        Z1 = np.dot(layer0, W1)+B1	# 행렬의 곱을 계산한다. 
        layer1 = actf(Z1)		# 활성화 함수를 적용한다. 
        Z2 = np.dot(layer1, W2)+B2	# 행렬의 곱을 계산한다. 
        layer2 = actf(Z2)		# 활성화 함수를 적용한다. 
        return layer0, layer1, layer2
    
# 역방향 전파 계산
def fit():
    global W1, W2, B1, B2
    for i in range(20000):
            layer0, layer1, layer2 = predict(X) # input-batch-size = 4
            layer2_error = layer2-T
            # print(layer0.shape,layer1.shape,layer2.shape)
            
            layer2_delta = layer2_error*actf_deriv(layer2)
            layer1_error = np.dot(layer2_delta, W2.T)
            layer1_delta = layer1_error*actf_deriv(layer1)
            # print(layer2_error.shape)
            # print(layer2_delta.shape)
            # print(layer1_error.shape)
            # print(layer1_delta.shape)
            
            W2 += -learning_rate*np.dot(layer1.T, layer2_delta)/4.0  # 4개의 입력에 대한 평균 기울기
            W1 += -learning_rate*np.dot(layer0.T, layer1_delta)/4.0
            B2 += -learning_rate*np.sum(layer2_delta, axis=0)/4.0
            B1 += -learning_rate*np.sum(layer1_delta, axis=0)/4.0

def test():
    for x, y in zip(X, T):
        x = np.reshape(x, (1, -1))		# 하나여도 2차원 형태이어야 한다.
        layer0, layer1, layer2 = predict(x)
        print(x, y, layer2)

fit()
test()

(2, 2) (2, 1) (2,) (1,)
[[0 0]] [0] [[0.0275429]]
[[0 1]] [1] [[0.96899939]]
[[1 0]] [1] [[0.97358685]]
[[1 1]] [0] [[0.02464057]]


> ### Does numpy with back propagation solve XOR problem?

In [2]:
# Keras MLP with 1 hidden layer, epochs=2000
import numpy as np
import tensorflow as tf

model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Dense(units=2, input_shape=(2,), activation='sigmoid')) #
model.add(tf.keras.layers.Dense(units=1,  activation='sigmoid')) #
model.compile(loss='mean_squared_error', 
              optimizer=tf.keras.optimizers.SGD(lr=0.1)) 

model.summary()
 
X = np.array([[0, 0],[0, 1],[1, 0],[1, 1]])
y = np.array([[0], [1], [1], [0]])

model.fit(X, y, batch_size=1, epochs=2000, verbose=2)

print(model.predict(X))



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 2)                 6         
                                                                 
 dense_1 (Dense)             (None, 1)                 3         
                                                                 
Total params: 9
Trainable params: 9
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2000
4/4 - 1s - loss: 0.2510 - 980ms/epoch - 245ms/step
Epoch 2/2000
4/4 - 0s - loss: 0.2510 - 28ms/epoch - 7ms/step
Epoch 3/2000
4/4 - 0s - loss: 0.2510 - 21ms/epoch - 5ms/step
Epoch 4/2000
4/4 - 0s - loss: 0.2510 - 16ms/epoch - 4ms/step
Epoch 5/2000
4/4 - 0s - loss: 0.2510 - 37ms/epoch - 9ms/step
Epoch 6/2000
4/4 - 0s - loss: 0.2510 - 57ms/epoch - 14ms/step
Epoch 7/2000
4/4 - 0s - loss: 0.2510 - 71ms/epoch - 18ms/step
Epoch 8/2000
4/4 - 0s - lo

> ### Not good result

In [3]:
# # Keras MLP with 2 hidden layers and epochs=2000
import numpy as np
import tensorflow as tf

model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Dense(units=16, input_shape=(2,), activation='relu')) #①
model.add(tf.keras.layers.Dense(units=8, activation='relu')) #
model.add(tf.keras.layers.Dense(units=1,  activation='sigmoid')) #		
model.compile(loss='mean_squared_error', 
              optimizer=tf.keras.optimizers.SGD(lr=0.1)) 

model.summary()

X = np.array([[0, 0],[0, 1],[1, 0],[1, 1]])
y = np.array([[0], [1], [1], [0]])

model.fit(X, y, batch_size=1, epochs=2000, verbose=2)

print(model.predict(X))



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 16)                48        
                                                                 
 dense_3 (Dense)             (None, 8)                 136       
                                                                 
 dense_4 (Dense)             (None, 1)                 9         
                                                                 
Total params: 193
Trainable params: 193
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2000
4/4 - 0s - loss: 0.2488 - 382ms/epoch - 95ms/step
Epoch 2/2000
4/4 - 0s - loss: 0.2484 - 10ms/epoch - 2ms/step
Epoch 3/2000
4/4 - 0s - loss: 0.2480 - 10ms/epoch - 2ms/step
Epoch 4/2000
4/4 - 0s - loss: 0.2477 - 13ms/epoch - 3ms/step
Epoch 5/2000
4/4 - 0s - loss: 0.2473 - 12ms/epoch - 3ms/step
Epoch 6/2000


---
---

## [DIY] Hyperparameter control
- Find the optimal training condition with epoches=2000

> ## Try Bard, ChatGPT, Bing AI
- ### Write a MLP code to solve XOR using Keras.

Sure, here's an example code using Keras to train a multi-layer perceptron (MLP) to solve the XOR problem:

---

In [5]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np

# Define the model architecture
model = Sequential()
model.add(Dense(8, input_dim=2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Define the training data and labels
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[0], [1], [1], [0]])

# Compile the model with binary cross-entropy loss and Adam optimizer
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model for 1000 epochs
model.fit(X, y, epochs=1000, verbose=0)

# Evaluate the model on the training data
scores = model.evaluate(X, y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

# Make predictions on new data
test_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
predictions = model.predict(test_data)

print("Predictions:")
for i in range(len(predictions)):
    print("Input: {}, Predicted Output: {:.2f}".format(test_data[i], predictions[i][0]))


accuracy: 100.00%
Predictions:
Input: [0 0], Predicted Output: 0.36
Input: [0 1], Predicted Output: 0.89
Input: [1 0], Predicted Output: 0.58
Input: [1 1], Predicted Output: 0.16


In this code, we define a MLP with one hidden layer of 8 neurons using the Dense class from Keras. We use the relu activation function for the hidden layer and the sigmoid activation function for the output layer, which gives us a probability estimate for the output class (either 0 or 1).

We then define the training data and labels as numpy arrays, where the input data has two features (the two inputs to the XOR gate) and the labels are binary (0 or 1) indicating the desired output for each input.

We compile the model with binary cross-entropy loss and the Adam optimizer, which is a commonly used optimizer for neural networks. Finally, we train the model for 1000 epochs on the training data and evaluate its accuracy on the same data.

When you run this code, you should see an accuracy of 100% on the training data, which means that the model has successfully learned to solve the XOR problem.

In [6]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np

# Define the model architecture
model = Sequential()
model.add(Dense(8, input_dim=2, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Define the training data and labels
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[0], [1], [1], [0]])

# Compile the model with binary cross-entropy loss and Adam optimizer
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model for 1000 epochs
model.fit(X, y, epochs=1000, verbose=0)

# Evaluate the model on the training data
scores = model.evaluate(X, y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

# Make predictions on new data
test_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
predictions = model.predict(test_data)

print("Predictions:")
for i in range(len(predictions)):
    print("Input: {}, Predicted Output: {:.2f}".format(test_data[i], predictions[i][0]))


accuracy: 100.00%
Predictions:
Input: [0 0], Predicted Output: 0.04
Input: [0 1], Predicted Output: 0.98
Input: [1 0], Predicted Output: 0.98
Input: [1 1], Predicted Output: 0.02
