In [1]:
import time

import numpy as np
import pandas as pd
import theano as th
import theano.tensor as T

In [2]:
class Autoencoder(object):
    
    def __init__(self, layers, activ='tanh', update='sgd', lr=0.0003, batch=1, memo = 0.3):
        self.x = T.matrix()
        self.y_hat = T.matrix()
        self.layers = layers
        self.activ, self.update = self.choose(activ, update)
        self.batch = batch; self.lr = lr; self.memo = memo
        self.weights = []; self.biases = []
        self.auxiliary = []
        self.a_n = [self.x]
        
    def choose(self, activ, update):
        """Choose the activation and update function"""
        Acti = dict({'tanh':self.tanh,'sigmoid':self.sigmoid,'ReLU':self.ReLU,'linear':self.linear})
        Upda = dict({'sgd':self.sgd,'NAG':self.NAG,'RMSProp':self.RMSProp,'momentum':self.momentum})
        return Acti[activ], Upda[update] 
        
    def architecture(self, cons, code_layer):
        """Build up the architecture by theano"""
        for i in range(len(self.layers)-1):
            #Initialize shared variables
            self.weights.append(th.shared(cons*np.random.randn(self.layers[i],self.layers[i+1])))
            self.biases.append(th.shared(cons*np.random.randn(self.layers[i+1])))
            #Building architecture
            a_next = self.activ(T.dot(self.a_n[i],self.weights[i]) + self.biases[i].dimshuffle('x',0))
            self.a_n.append(a_next)
        
        #help the optimization
        for param in (self.weights+self.biases):    
            self.auxiliary.append(th.shared(np.zeros(param.get_value().shape)))
            
        self.encode = th.function([self.x],self.a_n[code_layer]) 
        self.decode = th.function([self.a_n[code_layer]],self.a_n[-1])
        
        #Calculate the cost and gradients
        Cost = (T.sum((self.a_n[-1]-self.y_hat)**2))/self.batch
        grads = T.grad(Cost,self.weights+self.biases,disconnected_inputs='ignore') 
        #Update parameters
        self.gradient_2 = th.function(inputs=[self.x,self.y_hat],updates=
                                      self.update(self.weights+self.biases,grads,self.auxiliary),outputs=Cost)
            
    def fit(self, X, code_layer=1, epoch=10, print_every=1, cons=0.3):
        """fitting the data (unsupervised learning)"""
        self.architecture(cons, code_layer)
        start = time.clock(); self.Cost_Record = []   
        for j in range(epoch):
            costs = 0
            rounds = int(X.shape[0]/self.batch)
            X_permuted = X[np.random.permutation(X.shape[0])]
            
            for i in range(rounds):
                batch_X = X_permuted[i*self.batch:(i+1)*self.batch]
                costs += self.gradient_2(batch_X,batch_X)

            self.Cost_Record.append(costs/rounds)
            
            if j % print_every==0:
                print("Epoch %d ; Cost: %f; %f seconds used."%(j+1,self.Cost_Record[-1],(time.clock()-start)))
    
    def encode(self, X):
        return self.encode(X)
    
    def decode(self, X):
        return self.decode(X)
    
    ##### Optimization methods #####
    def sgd(self,para,grad,_):
        """optimized by gradient descent"""
        return [(para[ix], para[ix]-self.lr*grad[ix]) for ix in range(len(grad))]

    def NAG(self,para,grad,Real):
        """optimized by Nesterov accelerated gadient(NAG)"""
        updates = []
        for ix in range(len(grad)):
            #grad[ix] = T.clip(grad[ix],-1,1)
            gradient = -(self.lr/self.batch)*grad[ix]
            spy_position = (1+self.memo)*(para[ix]+gradient)-self.memo*Real[ix]
            updates.append((para[ix], spy_position))
            updates.append((Real[ix], para[ix]+gradient))
        return updates
    
    def momentum(self,para,grad,Momentum):
        """optimized by momentum"""
        updates = []
        for ix in range(len(grad)):
            #grad[ix] = T.clip(grad[ix],-1,1)
            direction = (self.memo)*Momentum[ix] - (self.lr/self.batch)*grad[ix]
            updates.append((para[ix], para[ix]+direction))
            updates.append((Momentum[ix], direction))
        return updates
    
    def RMSProp(self,para,grad,Sigma_square):
        """optimized by RMSProp"""
        updates = []; alpha = self.memo
        for ix in range(len(grad)):
            #grad[ix] = T.clip(grad[ix],-1,1)
            gradient = grad[ix]/self.batch
            Factor = Sigma_square[ix]*alpha+(1-alpha)*(gradient**2)
            direction = -(self.lr)*gradient/(T.sqrt(Factor)+0.001)
            updates.append((para[ix], para[ix]+direction))
            updates.append((Sigma_square[ix], Factor))
        return updates
    
    ##### Activation functions #####               
    def tanh(self, Z):
        return T.tanh(Z)
    
    def ReLU(self, Z):
        return T.switch(Z<0,0,Z)
    
    def sigmoid(self, Z):
        return 1/(1+T.exp(-Z))
    
    def linear(self, Z):
        return Z


In [3]:
data = pd.read_csv('Data/train.dat',header=None,delim_whitespace=True)
coder = Autoencoder([9,64,32,16,2,16,32,64,9], batch=4, activ='tanh', update='RMSProp', memo=0.9, lr=0.003)
coder.fit(data.values, code_layer=4, epoch=10)

Epoch 1 ; Cost: 3.732154; 0.007659 seconds used.
Epoch 2 ; Cost: 2.896641; 0.014883 seconds used.
Epoch 3 ; Cost: 2.757835; 0.022593 seconds used.
Epoch 4 ; Cost: 2.831620; 0.029912 seconds used.
Epoch 5 ; Cost: 2.593290; 0.037199 seconds used.
Epoch 6 ; Cost: 2.674831; 0.044469 seconds used.
Epoch 7 ; Cost: 2.499200; 0.051758 seconds used.
Epoch 8 ; Cost: 2.543282; 0.059048 seconds used.
Epoch 9 ; Cost: 2.458749; 0.067336 seconds used.
Epoch 10 ; Cost: 2.437635; 0.075334 seconds used.


In [4]:
code = coder.encode(data.values)
np.set_printoptions(4)
print('encode:\n',code[:5])
print('decode:\n',coder.decode(code)[:5])
print('original:\n',data.values[:5])

encode:
 [[ 0.9022 -0.2044]
 [-0.8444 -0.595 ]
 [-0.0083 -0.9609]
 [-0.7901 -0.2062]
 [-0.083  -0.2733]]
decode:
 [[ 0.4606 -0.1849 -0.1809  0.0117 -0.7336 -0.1489  0.1116 -0.4908 -0.5451]
 [-0.5626 -0.0559  0.312  -0.6131  0.1229 -0.227  -0.7466  0.5354 -0.0484]
 [ 0.1846  0.4433  0.5595 -0.295  -0.368   0.1861 -0.7999  0.2592 -0.051 ]
 [-0.5403 -0.0397 -0.0683 -0.5874  0.1923 -0.3252 -0.6156  0.4338 -0.1083]
 [ 0.163   0.3571  0.2002 -0.4426 -0.1815 -0.0698 -0.4926  0.2595  0.1824]]
original:
 [[ 0.8105 -0.35    0.4769  0.4541 -0.9829  0.5252  0.3838 -0.3408 -0.4824]
 [-0.6273 -0.2097  0.9404  0.1143  0.3487 -0.5206  0.0061  0.5024 -0.6687]
 [ 0.1624 -0.1173  0.426  -0.3607 -0.6632  0.4431 -0.8355  0.7206 -0.8977]
 [-1.      0.7758 -0.267  -0.888  -0.1099 -0.9183 -0.4086  0.8962  0.5841]
 [ 0.8464  0.1762  0.2729  0.2724  0.8155  0.6096 -0.2844  0.98    0.3302]]
