In [1]:
import numpy as np

In [62]:
class DeepLearning():
    def __init__(self,X,label,learning_rate,lbd=0):
        np.random.seed(2)
        np.seterr(all='raise')
        self.m = X.shape[1]
        self.n = {}
        self.prev_layer = X.shape[0]
        self.weights = {}
        self.bias = {}
        self.z = {}
        self.a = {}
        self.num_layers=0
        self.fn = {}
        self.da = {}
        self.dw = {}
        self.db = {}
        self.a[0] = X
        self.alpha = learning_rate
        self.y = label
        self.lbd = lbd
        
    def add_layer(self,neurons,acti_fn='sigmoid',c=1.0):
        if acti_fn=='relu':
            c=2.0
        self.weights[self.num_layers+1] = np.random.randn(neurons,self.prev_layer)*np.sqrt(c/self.prev_layer)
        #self.weights[self.num_layers+1] = 2*(np.random.rand(neurons,self.prev_layer))-1
        self.bias[self.num_layers+1] = np.random.random((neurons,1))
        self.num_layers+=1
        self.fn[self.num_layers] = acti_fn
        self.prev_layer=neurons
    
    def feed_forward(self):
        for layer in range(1,self.num_layers+1):
            self.z[layer] = np.dot(self.weights[layer],self.a[layer-1]) + self.bias[layer]
            self.a[layer] = self.activation_function(self.z[layer],self.fn[layer])
    
    def activation_function(self,x,acti_fn):
        if acti_fn == 'tanh':
            return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
        if acti_fn == 'relu':
            return np.maximum(0.01*x,x)
        if acti_fn == 'sigmoid':
            return 1 / (1 + np.exp(-x))
        
    def back_pass(self):
        #self.da[self.num_layers] = -1*(self.y/self.a[self.num_layers])+(1-self.y)/(1-self.a[self.num_layers])
        self.da[self.num_layers] = (self.y-self.a[self.num_layers])/(self.a[self.num_layers]**2-1)
        for layer in reversed(range(1,self.num_layers+1)):
            tmp = self.da[layer]*self.derivative_fn(self.z[layer],self.fn[layer])
            self.dw[layer] = (np.dot(tmp,(self.a[layer-1]).T))/self.m + (self.lbd*self.weights[layer])/(self.m+0.0)
            self.db[layer] = (np.sum(tmp,axis=1,keepdims=True))/self.m
            self.da[layer-1] = np.dot((self.weights[layer]).T,tmp)
    
    def derivative_fn(self,x,acti_fn):
        x = self.activation_function(x,acti_fn)
        if acti_fn == 'tanh':
            return 1-x**2
        if acti_fn == 'relu':
            return np.where(x<0,0.01,1)
        if acti_fn == 'sigmoid':
            return x * (1 - x)
    
    def gradient_descent(self):
        for layer in range(1,self.num_layers+1):
            self.weights[layer] = self.weights[layer] -self.alpha*self.dw[layer]
            self.bias[layer] = self.bias[layer] - self.alpha*self.db[layer]
            
    def cost_fn(self):
        activation=self.a[self.num_layers]
        reg_error = 0
        for layer in range(1,self.num_layers+1):
            reg_error += (np.linalg.norm(self.weights[layer]))**2
        reg_error = (reg_error*self.lbd)/(self.m+0.0)
        return (-1*np.average(np.log(activation)*self.y + np.log(1-activation)*(1-self.y)))+reg_error,np.sum((activation>0.5).astype(int)==self.y)/(self.m+0.0)
    
    def train(self):
        i = 0
        while True:
            self.feed_forward()
            self.back_pass()
            self.gradient_descent()
            i=i+1
            if i%100 == 0:
                print i
            #print self.cost_fn()
            if self.cost_fn()[0]<0.1:
                print i
                break

In [63]:
def mf(a):
    return (a[0]+a[1])
X = 100*(np.random.random((2,10000)))+1

In [64]:
y = ((np.apply_along_axis(mf,0,X))>110).astype(int).reshape(1,10000)

In [56]:
nn = DeepLearning(X,y,0.1)
nn.add_layer(3,acti_fn='relu')
nn.add_layer(7,acti_fn='relu')
nn.add_layer(5,acti_fn='relu')
nn.add_layer(9,acti_fn='relu')
nn.add_layer(6,acti_fn='relu')
nn.add_layer(9,acti_fn='relu')
nn.add_layer(5,acti_fn='relu')
nn.add_layer(7,acti_fn='relu')
nn.add_layer(3,acti_fn='relu')
nn.add_layer(1)
nn.train()

1943


In [68]:
nn = DeepLearning(X,y,learning_rate=0.1,lbd=0.05)
nn.add_layer(3,acti_fn='relu')
nn.add_layer(7,acti_fn='relu')
nn.add_layer(5,acti_fn='relu')
nn.add_layer(9,acti_fn='relu')
nn.add_layer(6,acti_fn='relu')
nn.add_layer(9,acti_fn='relu')
nn.add_layer(5,acti_fn='relu')
nn.add_layer(7,acti_fn='relu')
nn.add_layer(3,acti_fn='relu')
nn.add_layer(1)
nn.train()

100
200
300
400
500
600
700
800
860
