In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import utils

In [2]:
def load_data():
    """
    Load the MNIST dataset. Reads the training and testing files and create matrices.
    :Expected return:
    train_data:the matrix with the training data
    test_data: the matrix with the data that will be used for testing
    y_train: the matrix consisting of one 
                        hot vectors on each row(ground truth for training)
    y_test: the matrix consisting of one
                        hot vectors on each row(ground truth for testing)
    """
    
    #load the train files
    df = None
    
    y_train = []

    for i in range( 10 ):
        tmp = pd.read_csv( 'data/mnist/train%d.txt' % i, header=None, sep=" " )
        #build labels - one hot vector
        hot_vector = [ 1 if j == i else 0 for j in range(0,10) ]
        
        for j in range( tmp.shape[0] ):
            y_train.append( hot_vector )
        #concatenate dataframes by rows    
        if i == 0:
            df = tmp
        else:
            df = pd.concat( [df, tmp] )

    train_data = df.values
    y_train = np.array( y_train )
    
    #load test files
    df = None
    
    y_test = []

    for i in range( 10 ):
        tmp = pd.read_csv( 'data/mnist/test%d.txt' % i, header=None, sep=" " )
        #build labels - one hot vector
        
        hot_vector = [ 1 if j == i else 0 for j in range(0,10) ]
        
        for j in range( tmp.shape[0] ):
            y_test.append( hot_vector )
        #concatenate dataframes by rows    
        if i == 0:
            df = tmp
        else:
            df = pd.concat( [df, tmp] )

    test_data = df.values
    y_test = np.array( y_test )
    
    return train_data.astype(float)/255, test_data.astype(float)/255, y_train, y_test

In [14]:
class oneLayerMLP:
    
    def __init__(self, hiddenUnits , activationFunction):
        self.M = hiddenUnits
        self.h , self.hDeriv = self.getFunctions(activationFunction)
        
        
    def getFunctions(self,activationName):
        if activationName=="softplus":
            return lambda x: np.log(1+np.exp(x)) , lambda x: 1/(1+np.exp(-x))
        elif activationName=="tanh": 
            return lambda x: (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x)) , lambda x: 1 - ((np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x)))**2        
        elif activationName=="cos":
            return lambda x: np.cos(x) , lambda x: -np.sin(x)       
        else:
            print("Wrong code")
            return lambda x: 0  
        
        
        
    def train(self,X,t,l,learningRate,batchSize,K,epochs):
        X =  np.hstack(( np.ones((X.shape[0],1)) , X ))    #adding the bias unit
        self.l = l
        self.batchSize = batchSize
        self.w1 = np.random.uniform( -np.sqrt(2/self.M) ,np.sqrt(2/self.M)  , (self.M,X.shape[1])) 
        self.w2 = np.random.uniform(-np.sqrt(2/K)  ,np.sqrt(2/K)  , (K,self.M+1)) 
        self.costs = []
        
        batchesList = self.createMiniBatches(X,t,batchSize,K)

        for e in range(epochs):
            epochCost = []
            for batchX , batchT in batchesList:
                y,z = self.forward_propagation( batchX )
                cost , dW1 , dW2 = self.backward_propagation( batchX, batchT, z, y )
                self.w1 =self.w1 + learningRate*dW1
                self.w2 = self.w2 + learningRate*dW2
                epochCost.append(cost)
            self.costs.append( sum(epochCost)/self.batchSize )
            
            
    def softmax(self, x, ax=1 ):
        m = np.max( x, axis=ax, keepdims=True )        #max per row
        p = np.exp( x - m )
        return ( p / np.sum(p,axis=ax,keepdims=True) )
    
    
    def forward_propagation(self,x):
        z = self.h( x.dot(self.w1.T) )
        z = np.hstack(( np.ones((z.shape[0],1)) , z )) 
        y = self.softmax( z.dot(self.w2.T) )
        return y,z
    
    
    def backward_propagation(self,x,t,z,y):
        cost = np.sum(np.log(y)*t) - (self.l/2) * ( np.sum( np.square( self.w1 ) ) +  np.sum( np.square( self.w2 ) ) )    
        error = t-y
        dW1 = ( error.dot(self.w2[:,1:]) * self.hDeriv( x.dot(self.w1.T)) ).T.dot(x) - self.l*self.w1
        dW2 = error.T.dot(z) - self.l*self.w2
    
        return cost, dW1, dW2
    
    
    def createMiniBatches(self,x,t,batchSize,K):
        stacked = np.hstack((x, t ))
        np.random.shuffle(stacked)  
        batchesList = []
        i=0
        while i<stacked.shape[0]:
            if (i+batchSize<stacked.shape[0]):
                batchesList.append(stacked[i:i+batchSize])
            else:
                batchesList.append( stacked[i:])

            i+=batchSize

        batchesList = [ [ batch[:,:-K] , batch[:,-K:] ]  for batch in batchesList  ]
        return batchesList  
    
    
    def calculate_accuracy(self,X_test, t_test):
        X_test = np.hstack(( np.ones((X_test.shape[0],1)) , X_test ))
        y , _ = self.forward_propagation(X_test )
        
        return np.mean( np.argmax(y,1) == np.argmax(t_test,1) )

In [12]:
X_train, X_test, y_train, y_test = load_data()


In [24]:
mlp = oneLayerMLP(100,"cos")
mlp.train(X_train,y_train,0.1,0.001,100,10,10)

In [25]:
mlp.calculate_accuracy(X_test, y_test )

0.974