In [50]:
# genral NN
import numpy as np
from debug import *

class NN:
    def __init__(self, input_size=1, output_size=1, no_of_hid=1, size_of_hiddens=[2], active='relu', batch_size=10, classification=False):
        """
        @param: input_size; input size, other dimension should be 1
        @param: output_size; output_size other dimension should be 1
        @param: no_of_hid=1; number of hidden layers, default is 1
        @param: size_of_hiddens=[2]; sequence-wise no of nodes in each hidden layer.
        eg: if no_of_hid is 3 then size_of_hiddens layer should look like [3,4,5] 
        where 3, 4, 5 are no of nodes in each hidden layer in order.
        """
        self.log = False
        self.input_size = input_size
        self.output = None
        self.output_size = output_size
        self.no_of_hid = no_of_hid
        self.batch_size = batch_size
        self.hidden_y = []
        self.hidden_z = []
        self.size_of_hiddens = [self.input_size] + size_of_hiddens + [self.output_size]
        self.active = active
        self.classification = classification
        self.active_dict = {'sig': self.sigmoid, 'relu': self.relu, 'tanh': self.tanh, 'soft_max': self.soft_max, 'linear': self.linear}
        self.active_prime_dict = {'sig': self.sigmoid_prime, 'relu': self.relu_prime, 'tanh': self.tanh_prime, 'soft_max': self.soft_max_prime, 'linear': self.linear_prime}
        self.activate = self.active_dict[self.active]
        self.activate_prime = self.active_prime_dict[self.active]

        if self.classification:
            self.last_activation = self.active_dict['soft_max']
            self.last_activation_prime = self.active_prime_dict['soft_max']
        else:
            self.last_activation = self.active_dict['linear']
            self.last_activation_prime = self.active_prime_dict['linear']

        self.intialize_weights()
        self.intialize_biases()
        print("NOTE: Please try to give input as numpy array with defined shape eg: (2,1) and try to avoid shape like (2,)")

    def intialize_weights(self):
        self.weights = []
        for i in range(self.no_of_hid+1):
            in_size = self.size_of_hiddens[i]
            ou_size = self.size_of_hiddens[i+1]
            w = np.random.rand(ou_size, in_size)
            self.weights.append(w)

    def intialize_biases(self):
        self.bias = []
        for i in range(self.no_of_hid+1):
            size = self.size_of_hiddens[i+1]
            b = np.random.rand(size, 1)
            self.bias.append(b)

    def sigmoid(self, x):
        z = 1/(1 + np.exp(-x))
        return z

    def sigmoid_prime(self, x):
        y = self.sigmoid(x)
        z = y*(1-y)
        z.reshape(x.shape[0], 1)
        return z
    
    def relu(self, x):
        x[x<0] = 0
        return x
        
    def relu_prime(self, x):
        x[x>=0] = 1
        x[x<0] = 0
        return x
    
    def tanh(self, x):
        z = np.tanh(x)
        return z
        
    def tanh_prime(self, x):
        y = np.tanh(x)
        z = 1-np.square(y)
        z.reshape(x.shape[0], 1)
        return z
    
    def linear(self, x):
        return x
    
    def linear_prime(self, x):
        return np.ones(x.shape)
    
    def soft_max(self, x):
        z = np.exp(x-np.max(x))
        z = z/z.sum()
        z.reshape(x.shape[0], 1)
        return z
    
    def soft_max_prime(self, x):
        z = self.soft_max(x)
        return np.identity(x.shape[0]) - np.outer(z, z)

    def cost(self, y_hatt):
        # cross entropy
        # z = -np.sum(y_hatt*np.log(self.output))/self.output_size
        z = np.sum((0.5*np.square(y_hatt - self.output))/(self.output_size))
        return z

    def cost_prime(self, y_hatt):
        # self.output[abs(self.output)<1e-50] = 0.0
        # z = -np.multiply(y_hatt, 1/self.output)/self.output_size
        z = (self.output - y_hatt)*(1/self.output_size)
        return z
    
    def forward_prop(self, x):
        self.hidden_y.clear()
        self.hidden_z.clear()
        for i in range(self.no_of_hid):
            y = self.weights[i]@x + self.bias[i]
            z = self.activate(y)
            x = z
            self.hidden_y.append(y.copy())
            self.hidden_z.append(z.copy())
            if self.log:
                print("<HD")
                debug(hy=self.hidden_y, hz=self.hidden_z)
                print("HD>")
        y = self.weights[-1]@x + self.bias[-1]
        z = self.last_activation(y)
        x = z
        self.hidden_y.append(y.copy())
        self.hidden_z.append(z.copy())
        self.output = x.copy()

    def back_prop(self, x, y, rate):
        DJDW = [None]*(self.no_of_hid+1)
        DJDB = [None]*(self.no_of_hid+1)
        # debug(hys=self.hidden_y[-1], cps=self.cost_prime(y),
        #      ans = self.last_activation_prime(self.hidden_y[-1]) * self.cost_prime(y))
        DJDB[-1] = np.multiply(self.last_activation_prime(self.hidden_y[-1]), self.cost_prime(y))
        for i in range(self.no_of_hid-1, -1, -1):
            DJDB[i] = np.multiply(self.weights[i+1].T@DJDB[i+1], self.activate_prime(self.hidden_y[i]))

        DJDW[0] = DJDB[0]@x.T
        for i in range(1, self.no_of_hid+1):
            DJDW[i] = DJDB[i]@self.hidden_z[i-1].T

        if self.log:
            print("<DJ")
            debug(JW=DJDW, JB=DJDB)
            print("DJ>")

        for i in range(self.no_of_hid+1):
            self.weights[i] -= rate*DJDW[i]
            self.bias[i]    -= rate*DJDB[i]

    def train(self, X, Y, epochs = 60000, rate=0.4):
        n, m = X.shape
        for i in range(1, epochs+1):
            for _ in range(self.batch_size):
                k = np.random.randint(n)
                x = X[k].reshape(self.input_size, 1)
                y = Y[k].reshape(self.output_size, 1)
                self.forward_prop(x)
                self.back_prop(x, y, rate)
            if epochs>=10 and i%(epochs//10)==0: print(f"epoch: {i}, cost: {self.cost(y)}, {i*100//epochs}% complete...")

    def predict(self, X, symbols=None):
        x = X.reshape(self.input_size, 1)
        self.forward_prop(x)
        var = np.var(self.output)
        pos = np.argmax(self.output)
        symbol=None
        if symbols and len(symbols) == self.output.shape[0]:symbol = symbols[pos]
        return {"out": self.output, "pos": pos, "var": var, "symbol": symbol}

    def printWB(self):
        print("===weights")
        for w in self.weights:
            print(w.__str__())

        print("===bias")
        for b in self.bias:
            print(b.__str__())

def condition(space, fun, a=1, b=0, c=0):
    y = np.where(np.sign(space[:, 1] - fun(space[:, 0])) > c, a, b)
    return y

def splot(space, color):
    plt.scatter(space[:, 0], space[:, 1], color=color)
    plt.plot()

def cartesian_product(*arrays):
    ndim = len(arrays)
    return (np.stack(np.meshgrid(*arrays), axis=-1)
              .reshape(-1, ndim))
    
def random_space(x=np.array([-1.0, 1.0]), y=np.array([-1.0, 1.0]), samples=101, total_points=101):
    import itertools
    scalex, scaley = (x[1] - x[0], y[1] - y[0])
    shiftx, shifty = (-x[0], -y[0])
    space = np.random.random_sample((total_points, 2))@np.array([[scalex, 0], [0, scaley]]) - np.array([shiftx, shifty])
    return space 
    
def get_space(x=np.array([-1.0, 1.0]), y=np.array([-1.0, 1.0]), samples=101):
    import itertools
    X = np.linspace(x[0], x[1], samples).reshape(samples, 1)
    Y = np.linspace(y[0], y[1], samples).reshape(samples, 1)
    space = cartesian_product(X, Y)
    return space

def plot_space(space, output):
    # output = output.reshape(1, -1)
    f = lambda x: output
    c = np.where(output > 0.5, 'orange', 'blue').reshape(-1)
    splot(space, c)

def fun(x):
    return 5*x


total_points = 1000
rspace = random_space(total_points=total_points)
x = rspace[:, 0].reshape(-1, 1)
# y = condition(rspace, fun)
y = fun(x).reshape(-1, 1)

# debug(x=x, y=y)

nn = NN(input_size=1, output_size=1, no_of_hid=1, size_of_hiddens=[2], active='relu', batch_size=100)
# nn.log = True
nn.train(x, y, epochs=1000, rate=0.001)
nn.printWB()

NOTE: Please try to give input as numpy array with defined shape eg: (2,1) and try to avoid shape like (2,)
epoch: 100, cost: 0.0017028880208357984, 10% complete...
epoch: 200, cost: 2.481610055258964, 20% complete...
epoch: 300, cost: 0.06146489061670596, 30% complete...
epoch: 400, cost: 0.42688830272096584, 40% complete...
epoch: 500, cost: 0.046967569804091086, 50% complete...
epoch: 600, cost: 0.016860958050051465, 60% complete...
epoch: 700, cost: 0.1952113485155046, 70% complete...
epoch: 800, cost: 0.014601937766162266, 80% complete...
epoch: 900, cost: 0.2842402820618156, 90% complete...
epoch: 1000, cost: 0.014990172852808547, 100% complete...
===weights
[[-9.99319551]
 [20.7712926 ]]
[[-0.781005    0.31131259]]
===bias
[[-3.49699068]
 [-1.60434207]]
[[-0.58846961]]


In [52]:
for i in range(-10, 10):
    a = 0.1*i
    b = nn.predict(np.array([[a]]))['out']
    print(a, b, b[0][0]/a)

-1.0 [[-5.66203809]] 5.662038093328139
-0.9 [[-4.88156452]] 5.423960580956215
-0.8 [[-4.10109095]] 5.126363690491311
-0.7000000000000001 [[-3.32061738]] 4.743739117036431
-0.6000000000000001 [[-2.54014381]] 4.233573019096594
-0.5 [[-1.75967024]] 3.5193404819808207
-0.4 [[-0.97919667]] 2.447991676307162
-0.30000000000000004 [[-0.58846961]] 1.961565370931494
-0.2 [[-0.58846961]] 2.9423480563972415
-0.1 [[-0.58846961]] 5.884696112794483
0.0 [[-0.58846961]] -inf
0.1 [[-0.44128501]] -4.4128500949605085
0.2 [[0.20535147]] 1.026757356939303
0.30000000000000004 [[0.85198795]] 2.839959840905906
0.4 [[1.49862443]] 3.746561082889209
0.5 [[2.14526091]] 4.290521828079189
0.6000000000000001 [[2.79189739]] 4.65316232487251
0.7000000000000001 [[3.43853388]] 4.9121912511534545
0.8 [[4.08517036]] 5.106462945864162
0.9 [[4.73180684]] 5.257563152861378


  print(a, b, b[0][0]/a)
