In [2]:
import numpy as np
from keras.models import Sequential
# from keras.layers.core import Dense
from tensorflow.keras.layers import Dense

In [3]:
# Why XOR? Because it is a non-linealy separable problem
# XOR problem training samples

training_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], 'float32')

In [4]:
# XOR problem target values accordingly
target_data = np.array([[0], [1], [1], [0]], 'float32')

In [8]:
# we can define the neural network layers in a sequential manner
model = Sequential()
# first parameter is output dimension
model.add(Dense(16, input_dim=2, activation='relu'))
model.add(Dense(16, input_dim=16, activation='relu'))
model.add(Dense(16, input_dim=16, activation='relu'))
model.add(Dense(16, input_dim=16, activation='relu'))
model.add(Dense(16, input_dim=16, activation='relu'))
model.add(Dense(16, input_dim=16, activation='relu'))
model.add(Dense(16, input_dim=16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# we can define the loss function MSE or negative log likelihood
# optimizer will find the right adjustments for the weights: SGD, Adagrad, ADAM ...
model.compile(loss='mean_squared_error',
             optimizer='adam',
             metrics=['binary_accuracy'])



# activation='sigmoid'
# If we use sigmoid function, then because of the vanishing gradient, which means that the derivative of the sigmoid
# may be so small that the update operations are not going to work that much,
# which means that the algorithm needs more epochs

loss='mean_squared_error'

loss function value is going to decrease during the training procedure.

So at the beginning, there is a high error, which means that the value of the loss function is high as well.

And during the training procedure, the algorithm keeps updating the weights, which means that the model keeps making

better and better predictions, which means that the error will decrease

And of course, because the error decreases, the loss function value will decrease as well

In [6]:
# epoch is an iteration over the entire dataset
# verbose 0 is silent 1 and 2 are showing results
model.fit(training_data, target_data, epochs=500, verbose=2)

# epoch is single iteration over the entire dataset

Epoch 1/500
1/1 - 3s - loss: 0.2485 - binary_accuracy: 0.5000 - 3s/epoch - 3s/step
Epoch 2/500
1/1 - 0s - loss: 0.2479 - binary_accuracy: 0.5000 - 22ms/epoch - 22ms/step
Epoch 3/500
1/1 - 0s - loss: 0.2473 - binary_accuracy: 0.5000 - 8ms/epoch - 8ms/step
Epoch 4/500
1/1 - 0s - loss: 0.2468 - binary_accuracy: 0.5000 - 9ms/epoch - 9ms/step
Epoch 5/500
1/1 - 0s - loss: 0.2462 - binary_accuracy: 0.7500 - 8ms/epoch - 8ms/step
Epoch 6/500
1/1 - 0s - loss: 0.2455 - binary_accuracy: 0.7500 - 5ms/epoch - 5ms/step
Epoch 7/500
1/1 - 0s - loss: 0.2447 - binary_accuracy: 0.7500 - 7ms/epoch - 7ms/step
Epoch 8/500
1/1 - 0s - loss: 0.2439 - binary_accuracy: 0.7500 - 7ms/epoch - 7ms/step
Epoch 9/500
1/1 - 0s - loss: 0.2431 - binary_accuracy: 0.7500 - 6ms/epoch - 6ms/step
Epoch 10/500
1/1 - 0s - loss: 0.2423 - binary_accuracy: 0.7500 - 6ms/epoch - 6ms/step
Epoch 11/500
1/1 - 0s - loss: 0.2413 - binary_accuracy: 0.7500 - 7ms/epoch - 7ms/step
Epoch 12/500
1/1 - 0s - loss: 0.2404 - binary_accuracy: 0.7500 

Epoch 97/500
1/1 - 0s - loss: 0.0270 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step
Epoch 98/500
1/1 - 0s - loss: 0.0260 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 99/500
1/1 - 0s - loss: 0.0250 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step
Epoch 100/500
1/1 - 0s - loss: 0.0241 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 101/500
1/1 - 0s - loss: 0.0232 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 102/500
1/1 - 0s - loss: 0.0223 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 103/500
1/1 - 0s - loss: 0.0214 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step
Epoch 104/500
1/1 - 0s - loss: 0.0206 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 105/500
1/1 - 0s - loss: 0.0198 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step
Epoch 106/500
1/1 - 0s - loss: 0.0190 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 107/500
1/1 - 0s - loss: 0.0183 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 108/500
1/1 - 0s - loss: 0.0176 - binary

Epoch 191/500
1/1 - 0s - loss: 6.9653e-04 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 192/500
1/1 - 0s - loss: 6.8137e-04 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 193/500
1/1 - 0s - loss: 6.6675e-04 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 194/500
1/1 - 0s - loss: 6.5256e-04 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 195/500
1/1 - 0s - loss: 6.3892e-04 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step
Epoch 196/500
1/1 - 0s - loss: 6.2573e-04 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 197/500
1/1 - 0s - loss: 6.1292e-04 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step
Epoch 198/500
1/1 - 0s - loss: 6.0049e-04 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 199/500
1/1 - 0s - loss: 5.8846e-04 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 200/500
1/1 - 0s - loss: 5.7686e-04 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 201/500
1/1 - 0s - loss: 5.6567e-04 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step

1/1 - 0s - loss: 1.5275e-04 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 282/500
1/1 - 0s - loss: 1.5060e-04 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 283/500
1/1 - 0s - loss: 1.4848e-04 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 284/500
1/1 - 0s - loss: 1.4640e-04 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 285/500
1/1 - 0s - loss: 1.4447e-04 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 286/500
1/1 - 0s - loss: 1.4255e-04 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 287/500
1/1 - 0s - loss: 1.4051e-04 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 288/500
1/1 - 0s - loss: 1.3863e-04 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step
Epoch 289/500
1/1 - 0s - loss: 1.3682e-04 - binary_accuracy: 1.0000 - 4ms/epoch - 4ms/step
Epoch 290/500
1/1 - 0s - loss: 1.3503e-04 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step
Epoch 291/500
1/1 - 0s - loss: 1.3325e-04 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 292/500

Epoch 372/500
1/1 - 0s - loss: 5.1546e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 373/500
1/1 - 0s - loss: 5.1007e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 374/500
1/1 - 0s - loss: 5.0484e-05 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 375/500
1/1 - 0s - loss: 4.9963e-05 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 376/500
1/1 - 0s - loss: 4.9443e-05 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 377/500
1/1 - 0s - loss: 4.8926e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 378/500
1/1 - 0s - loss: 4.8410e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 379/500
1/1 - 0s - loss: 4.7897e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 380/500
1/1 - 0s - loss: 4.7439e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 381/500
1/1 - 0s - loss: 4.6960e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 382/500
1/1 - 0s - loss: 4.6444e-05 - binary_accuracy: 1.0000 - 4ms/epoch - 4ms/step

1/1 - 0s - loss: 2.2115e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 463/500
1/1 - 0s - loss: 2.1930e-05 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 464/500
1/1 - 0s - loss: 2.1744e-05 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 465/500
1/1 - 0s - loss: 2.1574e-05 - binary_accuracy: 1.0000 - 4ms/epoch - 4ms/step
Epoch 466/500
1/1 - 0s - loss: 2.1394e-05 - binary_accuracy: 1.0000 - 8ms/epoch - 8ms/step
Epoch 467/500
1/1 - 0s - loss: 2.1213e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 468/500
1/1 - 0s - loss: 2.1042e-05 - binary_accuracy: 1.0000 - 7ms/epoch - 7ms/step
Epoch 469/500
1/1 - 0s - loss: 2.0871e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 470/500
1/1 - 0s - loss: 2.0703e-05 - binary_accuracy: 1.0000 - 8ms/epoch - 8ms/step
Epoch 471/500
1/1 - 0s - loss: 2.0533e-05 - binary_accuracy: 1.0000 - 5ms/epoch - 5ms/step
Epoch 472/500
1/1 - 0s - loss: 2.0369e-05 - binary_accuracy: 1.0000 - 6ms/epoch - 6ms/step
Epoch 473/500

<keras.src.callbacks.History at 0x181c3c856f0>

In [7]:
# of course we can make prediction with the trained neural network
print(model.predict(training_data).round())

[[0.]
 [1.]
 [1.]
 [0.]]


In [10]:
# loss function:
# loss: 1.6401e-05
# the loss value is very very close to zero which means that the model is making good prediction
# In the first iteration, loss: 0.2485 (which means the model is 24.85% accurate)
# While training, the loss value keeps decreasing

# activation='sigmoid', epochs=5000
# loss: 1.2420e-05
# it means the loss function is very close to zero, there is a very very small error term