# MLP XOR: backprop a máquina

In [1]:
import numpy as np; np.set_printoptions(precision=4)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
W = np.array([[1, 1], [1, 1]]); b1 = np.array([-1,  .5])
V = np.array([[1, -1], [-1, 1]]); b2 = np.array([ 1, -1])

## Forward: $\small\quad\boldsymbol{x}\to\boldsymbol{z}(\mathbf{W},\boldsymbol{b}_1)\to\boldsymbol{h}\to\boldsymbol{a}(\mathbf{V},\boldsymbol{b}_2)\to\mathcal{L}$

In [2]:
z = X @ W + b1; print('z =', str(z).replace('\n',','))
h = np.maximum(0, z); print('h =', str(h).replace('\n',','))
a = h @ V + b2; print('a =', str(a).replace('\n',','))
y_pred = np.exp(a); y_pred = np.transpose(y_pred.T / y_pred.sum(axis=1))
print('y_pred =', str(y_pred).replace('\n',','))
Ln = -y * np.log(y_pred)
print('Ln =', str(Ln).replace('\n',','), '\nL =', np.sum(Ln)/4.)

z = [[-1.   0.5], [ 0.   1.5], [ 0.   1.5], [ 1.   2.5]]
h = [[0.  0.5], [0.  1.5], [0.  1.5], [1.  2.5]]
a = [[ 0.5 -0.5], [-0.5  0.5], [-0.5  0.5], [-0.5  0.5]]
y_pred = [[0.7311 0.2689], [0.2689 0.7311], [0.2689 0.7311], [0.2689 0.7311]]
Ln = [[ 0.3133 -0.    ], [-0.      0.3133], [-0.      0.3133], [ 1.3133 -0.    ]] 
L = 0.5632616875182226


## Backward $\small\quad\mathcal{L}\to\boldsymbol{a}(\mathbf{V},\boldsymbol{b}_2)\to\boldsymbol{h}\to\boldsymbol{z}(\mathbf{W},\boldsymbol{b}_1)\to\boldsymbol{x}$

In [3]:
n = 0; ut = (y_pred[n] - y[n]).reshape(1, -1); print('uJLa =', str(ut).replace('\n',','))
gV = np.kron(h[n].reshape(1, -1), ut.T); print('gV =', str(gV).replace('\n',','))
gb2 = ut.T; print('gb2 =', str(gb2).replace('\n',','))
ut = ut @ V; print('uJLaJah =', str(ut).replace('\n',','))
Jhz = np.diag(np.heaviside(z[n], 0.0)); print('Jhz =', str(Jhz).replace('\n',','))
ut = ut @ Jhz; print('uJLaJahJhz =', str(ut).replace('\n',','))
gW = np.kron(X[n, :], ut.T); print('gW =', str(gW).replace('\n',','))
gb1 = ut.T; print('gb1 =', str(gb1).replace('\n',','))
ut = ut @ W; print('uJLaJahJhzJzx =', str(ut).replace('\n',','), '\n')

uJLa = [[-0.2689  0.2689]]
gV = [[-0.     -0.1345], [ 0.      0.1345]]
gb2 = [[-0.2689], [ 0.2689]]
uJLaJah = [[-0.5379  0.5379]]
Jhz = [[0. 0.], [0. 1.]]
uJLaJahJhz = [[0.     0.5379]]
gW = [[0. 0.], [0. 0.]]
gb1 = [[0.    ], [0.5379]]
uJLaJahJhzJzx = [[0.5379 0.5379]] 



## Forward-Backward con keras

In [5]:
import tensorflow as tf; from tensorflow import keras
W = tf.constant_initializer([[1, 1], [1, 1]]); b1 = tf.constant_initializer([-1,  .5])
V = tf.constant_initializer([[1, -1], [-1, 1]]); b2 = tf.constant_initializer([ 1, -1])
L1 = keras.layers.Dense(2, activation=tf.nn.relu, input_dim=2, kernel_initializer=W, bias_initializer=b1)
L2 = keras.layers.Dense(2, activation=tf.nn.softmax, kernel_initializer=V, bias_initializer=b2)
M = keras.Sequential([L1, L2])
L1_preact = keras.layers.Dense(2, activation=None, input_dim=2, kernel_initializer=W, bias_initializer=b1)
z = L1_preact(X); print('z =', str(z).replace('\n',''))
h = L1(X); print('h =', str(h).replace('\n',''))
L2_preact = keras.layers.Dense(2, activation=None, kernel_initializer=V, bias_initializer=b2)
a = L2_preact(h); print('a =', str(a).replace('\n','')) # softmax
p = L2(h); print('p =', str(p).replace('\n',''))
L = tf.keras.losses.CategoricalCrossentropy(from_logits=False); print('L =', L(y, p))
optimizer = tf.optimizers.SGD(learning_rate=0.1)
M.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
M.fit(X, y, epochs=1, verbose=1); print(L1.get_weights(), "\n", L2.get_weights())

z = tf.Tensor([[-1.   0.5] [ 0.   1.5] [ 0.   1.5] [ 1.   2.5]], shape=(4, 2), dtype=float32)
h = tf.Tensor([[0.  0.5] [0.  1.5] [0.  1.5] [1.  2.5]], shape=(4, 2), dtype=float32)
a = tf.Tensor([[ 0.5 -0.5] [-0.5  0.5] [-0.5  0.5] [-0.5  0.5]], shape=(4, 2), dtype=float32)
p = tf.Tensor([[0.7311 0.2689] [0.2689 0.7311] [0.2689 0.7311] [0.2689 0.7311]], shape=(4, 2), dtype=float32)
L = tf.Tensor(0.5632617, shape=(), dtype=float32)
[array([[1.0366, 0.9769],
       [1.0366, 0.9769]], dtype=float32), array([-0.9634,  0.4769], dtype=float32)] 
 [array([[ 1.0183, -1.0183],
       [-0.9711,  0.9711]], dtype=float32), array([ 1.0116, -1.0116], dtype=float32)]
