In [40]:
import numpy as np

In [41]:
x_seeds = np.array([(0,0),(1,0),(0,1),(1,1)])
y_seeds = np.array([0,1,1,0])

In [42]:
N=1000
idxs = np.random.randint(0,4,N)

In [43]:
X = x_seeds[idxs]
Y = y_seeds[idxs]

In [44]:
X = X + np.random.normal(scale = 0.25, size = X.shape)

In [45]:
class shallowNN():
    def __init__(self, num_input_features, num_hiddens):
        self.num_input_features = num_input_features
        self.num_hiddens = num_hiddens

        self.W1 = np.random.normal(size = (num_hiddens,num_input_features))
        self.b1 = np.random.normal(size = num_hiddens)
        self.W2 = np.random.normal(size = num_hiddens)
        self.b2 = np.random.normal(size = 1)

    def sigmoid(self,z):
        return 1/(1 + np.exp(-z))

    def predict(self,x):
        z1 = np.matmul(self.W1,x) + self.b1
        a1 = np.tanh(z1)
        z2 = np.matmul(self.W2,a1) + self.b2
        a2 = self.sigmoid(z2)
        return a2,(z1,a1,z2,a2)

In [46]:
model = shallowNN(2,3)

In [47]:
def train(X,Y,model,lr = 0.1):
    dW1 = np.zeros_like(model.W1)
    db1 = np.zeros_like(model.b1)
    dW2 = np.zeros_like(model.W2)
    db2 = np.zeros_like(model.b2)
    cost = 0.0
    m = len(X)

    for x,y in zip(X,Y):
        a2,(z1,a1,z2,a2) = model.predict(x)
        if y == 1:
            cost -= np.log(a2)
        else:
            cost -= np.log(1-a2)
        diff = a2 - y
        #layer2
        db2 += diff
        dW2 += a1*diff
        #layer1
        db1_tmp = diff * model.W2 *(1-a1**2)
        db1 += db1_tmp
        dW1 += np.outer(db1_tmp,x)

    cost /= m
    model.W1 -= lr * dW1/m
    model.b1 -= lr * db1/m
    model.W2 -= lr * dW2/m
    model.b2 -= lr * db2/m

    return cost

In [48]:
for epoch in range(100):
    cost = train(X,Y,model,1.0)
    if epoch%10 == 0:
        print(epoch,cost)

0 [0.65548231]
10 [0.58434023]
20 [0.53604991]
30 [0.48349684]
40 [0.42739712]
50 [0.38130771]
60 [0.34882124]
70 [0.32671687]
80 [0.31145999]
90 [0.3003896]


In [49]:
#test

model.predict((1,1))[0].item()

0.051980981096336004

In [50]:
model.predict((1,0))[0].item()

0.9195595911014761

In [51]:
model.predict((0,1))[0].item()

0.8771774721417097

In [52]:
model.predict((0,0))[0].item()

0.08328532234916122