In [1]:
import numpy as np

In [68]:
class DeepLearning():
    def __init__(self,X,label,learning_rate,lbd=0,batch_size = None):
        np.random.seed(2)
        np.seterr(all='raise')
        self.m = X.shape[1]
        self.n = {}
        self.prev_layer = X.shape[0]
        self.weights = {}
        self.bias = {}
        self.z = {}
        self.a = {}
        self.num_layers=0
        self.fn = {}
        self.da = {}
        self.dw = {}
        self.db = {}
        #self.a[0] = X
        self.alpha = learning_rate
        self.y = label
        self.lbd = lbd
        self.keep_prob = {}
        self.drop_out = {}
        self.X = X
        self.batch_size = batch_size
        
    def add_layer(self,neurons,acti_fn='sigmoid',c=1.0,keep_prob=1.0):
        if acti_fn=='relu':
            c=2.0
        self.weights[self.num_layers+1] = np.random.randn(neurons,self.prev_layer)*np.sqrt(c/self.prev_layer)
        #self.weights[self.num_layers+1] = 2*(np.random.rand(neurons,self.prev_layer))-1
        self.bias[self.num_layers+1] = np.random.random((neurons,1))
        self.num_layers+=1
        self.fn[self.num_layers] = acti_fn
        self.keep_prob[self.num_layers] = keep_prob
        self.prev_layer=neurons
    def fit(self,test):
        a_tmp = test
        for layer in range(1,self.num_layers+1):
            z_tmp = np.dot(self.weights[layer],a_tmp) + self.bias[layer]
            a_tmp = self.activation_function(z_tmp,self.fn[layer])
        return a_tmp
    def feed_forward(self,X):
        self.a[0] = self.X
        for layer in range(1,self.num_layers+1):
            self.z[layer] = np.dot(self.weights[layer],self.a[layer-1]) + self.bias[layer]
            self.a[layer] = self.activation_function(self.z[layer],self.fn[layer])
            if self.keep_prob[layer]!=1:
                self.drop_out[layer] = np.random.rand(self.a[layer].shape[0],self.a[layer].shape[1])<self.keep_prob[layer]
                self.a[layer] = np.multiply(self.drop_out[layer],self.a[layer])
                self.a[layer] /=self.keep_prob[layer]
            else:
                self.drop_out[layer]=1
    
    def activation_function(self,x,acti_fn):
        if acti_fn == 'tanh':
            return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
        if acti_fn == 'relu':
            return np.maximum(0.01*x,x)
        if acti_fn == 'sigmoid':
            return 1 / (1 + np.exp(-x))
        
    def back_pass(self):
        #self.da[self.num_layers] = -1*(self.y/self.a[self.num_layers])+(1-self.y)/(1-self.a[self.num_layers])
        self.da[self.num_layers] = (self.y-self.a[self.num_layers])/(self.a[self.num_layers]**2-1)
        for layer in reversed(range(1,self.num_layers+1)):
            tmp = self.da[layer]*self.derivative_fn(self.a[layer],self.fn[layer])
            self.dw[layer] = (np.dot(tmp,(self.a[layer-1]).T))/self.m + (self.lbd*self.weights[layer])/(self.m+0.0)
            self.db[layer] = (np.sum(tmp,axis=1,keepdims=True))/self.m
            self.da[layer-1] = np.dot((self.weights[layer]).T,tmp)
            if layer>1:
                self.da[layer-1] = self.drop_out[layer-1]*self.da[layer-1]
                self.da[layer-1]/=self.keep_prob[layer-1]
    
    def derivative_fn(self,x,acti_fn):
        #x = self.activation_function(x,acti_fn)
        if acti_fn == 'tanh':
            return 1-x**2
        if acti_fn == 'relu':
            return np.where(x<=0,0.01,1)
        if acti_fn == 'sigmoid':
            return x * (1 - x)
    
    def gradient_descent(self):
        for layer in range(1,self.num_layers+1):
            self.weights[layer] = self.weights[layer] -self.alpha*self.dw[layer]
            self.bias[layer] = self.bias[layer] - self.alpha*self.db[layer]
            
    def cost_fn(self):
        activation=self.a[self.num_layers]
        reg_error = 0
        for layer in range(1,self.num_layers+1):
            reg_error += (np.linalg.norm(self.weights[layer]))**2
        reg_error = (reg_error*self.lbd)/(self.m+0.0)
        return (-1*np.average(np.log(activation)*self.y + np.log(1-activation)*(1-self.y)))+reg_error,np.sum((activation>0.5).astype(int)==self.y)/(self.m+0.0)
    
    def train(self):
        for i in range(0, 30000):
            self.feed_forward(self.X)
            self.back_pass()
            self.gradient_descent()
            print self.cost_fn()
            if self.cost_fn()[0]<0.05:
                print i,self.cost_fn()
                break
                
    def batch_train(self,batch_size=None):
        if batch_size is None:
                batch_size = self.m
        nb = int(np.ceil((self.m+0.0)/batch_size))
        for i in range(0, 30000):
            for t in range(0,nb):
                X_b = X[:,batch_size*t:batch_size*(t+1)]
                self.feed_forward(X_b)
                self.back_pass()
                self.gradient_descent()
                
            print self.cost_fn()
            if self.cost_fn()[0]<0.1:
                print i,self.cost_fn()
                break

In [69]:
def mf(a):
    return (a[0]+a[1])
X = 100*(np.random.random((2,10000)))+1

In [70]:
y = ((np.apply_along_axis(mf,0,X))>110).astype(int).reshape(1,10000)

In [82]:
nn = DeepLearning(X,y,learning_rate=0.5,lbd=0)
nn.add_layer(3,acti_fn='relu')
nn.add_layer(7,acti_fn='relu')
nn.add_layer(5,acti_fn='relu')
nn.add_layer(9,acti_fn='relu')
nn.add_layer(6,acti_fn='relu')
nn.add_layer(9,acti_fn='relu')
nn.add_layer(5,acti_fn='relu')
nn.add_layer(7,acti_fn='relu')
nn.add_layer(3,acti_fn='relu')
nn.add_layer(1)
st = time.time()
nn.batch_train(batch_size=1)
et = time.time()

(0.034721965268554318, 0.98619999999999997)
0 (0.034721965268554318, 0.98619999999999997)


In [83]:
nn.weights

{1: array([[-0.32991595,  0.02455025],
        [-0.83048935,  0.0862456 ],
        [-2.53331475, -2.66925997]]),
 2: array([[ 0.16531026, -0.88124243, -2.38158381],
        [ 0.38130625, -0.45239882,  0.95679649],
        [ 0.39152829, -0.36028977,  0.99283534],
        [ 0.04985939,  0.6486037 ,  0.01002727],
        [ 0.04975904, -0.06551715,  0.10666316],
        [ 0.0167011 , -0.41464505, -1.69182746],
        [ 0.31352675, -0.25677035,  2.03514046]]),
 3: array([[ -9.51776182e-01,   4.48240735e-01,  -3.10103018e-01,
           2.41432495e-01,   2.08896516e-01,  -1.10898187e+00,
           1.78883223e+00],
        [  9.79160623e-01,  -4.72851940e-01,   2.39797360e-01,
           7.37743122e-02,  -2.93416760e-01,   2.18822516e-01,
           8.53596855e-02],
        [ -5.85334718e-01,   1.32406502e-02,  -1.18699759e-01,
           2.03799779e-01,   2.38865463e-01,  -5.11645686e-01,
           5.96245552e-01],
        [  4.53869482e-02,  -1.01541976e+00,  -2.28925468e-01,
           