In [25]:
import numpy as np
from sklearn import svm

inputs = np.array([[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0], [1, 1, 0, 0]])
y = np.array([1, 0, 0, 1])

classifier = svm.SVC(kernel="linear")
classifier.fit(inputs, y)

classifier.predict(inputs)

array([1, 1, 1, 1])

The SVM algorithm is failing to find a separating hyperplane to separate the two clusters of data, so the given data is not linearly separable, and hence it's not possible to solve this problem with a single perceptron. A multi layer perceptron is needed.

For a better explanation, please see the file [linearlyseparable.pdf](linearlyseparable.pdf).

By using the simplified equation:
$$ x_1 x_2 + x_3 x_4 $$

In [14]:
W1 = np.array([[2, 2, 0, 0], [0, 0, 2, 2]])
b1 = np.array([[-3, -3]]).T
W2 = np.array([[2, 2]])
b2 = np.array([[-1]]).T


def step_activation(x):
    y = np.zeros(x.shape)
    y[x > 0] = 1
    return y


def MLNN(x):
    x = x.T
    H = step_activation((W1 @ x)[:, np.newaxis] + b1)
    return step_activation(W2 @ H + b2)


def print_results():
    for i in inputs:
        out = MLNN(i)
        print("(%d, %d, %d, %d) --> %d" % (i[0], i[1], i[2], i[3], out[0]))


print_results()

(0, 0, 1, 1) --> 1
(0, 1, 0, 1) --> 0
(1, 0, 1, 0) --> 0
(1, 1, 0, 0) --> 1


  print("(%d, %d, %d, %d) --> %d" % (i[0], i[1], i[2], i[3], out[0]))


By using the full equation:
$$ \bar{x_1} \bar{x_2} x_3 x_4 + x_1 x_2 \bar{x_3} \bar{x_4} $$

In [15]:
import numpy as np

W1 = np.array([[-2, -2, 0, 0], [0, 0, 2, 2], [2, 2, 0, 0], [0, 0, -2, -2]])
b1 = np.array([[1, -3, -3, 1]]).T
W2 = np.array([[2, 2, 2, 2]])
b2 = np.array([[-3, -3]]).T
W3 = np.array([[2, 2]])
b3 = np.array([[-1]]).T


def step_activation(x):
    y = np.zeros(x.shape)
    y[x > 0] = 1
    return y


def MLNN(x):
    x = x.T
    H1 = step_activation((W1 @ x)[:, np.newaxis] + b1)
    H2 = step_activation((W2 @ H1)[:, np.newaxis] + b2)
    return step_activation(W3 @ H2 + b3)


def print_results():
    inputs = np.array([[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0], [1, 1, 0, 0]])
    for i in inputs:
        out = MLNN(i)
        print("(%d, %d, %d, %d) --> %d" % (i[0], i[1], i[2], i[3], out[0]))


print_results()

(0, 0, 1, 1) --> 1
(0, 1, 0, 1) --> 0
(1, 0, 1, 0) --> 0
(1, 1, 0, 0) --> 1


  print("(%d, %d, %d, %d) --> %d" % (i[0], i[1], i[2], i[3], out[0]))


By using a Learning NN:

In [16]:
import jax
import jax.numpy as jnp

inputs = np.array([[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0], [1, 1, 0, 0]])
outputs = np.array([[1], [0], [0], [1]])

n1, n2, n3, n4 = 4, 4, 4, 1


def init_params():
    np.random.seed(0)
    W1 = np.random.randn(n2, n1)
    b1 = jnp.zeros((n2, 1))
    W2 = np.random.randn(n3, n2)
    b2 = jnp.zeros((n3, 1))
    W3 = np.random.randn(n4, n3)
    b3 = jnp.zeros((n4, 1))

    params = [W1, b1, W2, b2, W3, b3]
    return params


def ANN(x, params):
    [W1, b1, W2, b2, W3, b3] = params
    z1 = x.T
    z2 = jnp.tanh(W1 @ z1 - b1)
    z3 = jnp.tanh(W2 @ z2 - b2)
    z4 = jnp.tanh(W3 @ z3 - b3)
    return 0.5 * (z4 + 1).T


def loss_quadratic(x, y, params):
    return jnp.sum((ANN(x, params) - y) ** 2)


loss_jit = jax.jit(loss_quadratic)
grad_jit = jax.jit(jax.grad(loss_quadratic, argnums=2))


def optimize_params(inputs, outputs, params, epochs=1000, lr=0.1):
    for k in range(epochs):
        g = grad_jit(inputs, outputs, params)
        for i in range(len(params)):
            params[i] -= lr * g[i]

    print("Number of epochs: %d" % epochs)
    print("Final loss: %.4f" % loss_jit(inputs, outputs, params))
    return params


params = init_params()
params = optimize_params(inputs, outputs, params)

result = ANN(inputs, params)
result

No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


Number of epochs: 1000
Final loss: 0.0004


Array([[0.9886856 ],
       [0.00885692],
       [0.0145981 ],
       [0.9945016 ]], dtype=float32)