In [7]:
def softmax( x, ax=1 ):
    m = np.max( x, axis=ax, keepdims=True )    #max per row
    p = np.exp( x - m )
    return ( p / np.sum(p,axis=ax,keepdims=True) )

class oneLayerMLP:
    
    def __init__(self, hiddenUnits , activationFunction):
        self.M = hiddenUnits
        self.h , self.hDeriv = self.getFunctions(activationFunction)

        
    def getFunctions(self,activationName):
        if activationName=="softplus":
           return lambda x: np.log(1+np.exp(x)) , lambda x: 1/(1+np.exp(-x))
        elif activationName=="tanh": 
            return lambda x: np.tanh(x) , lambda x: 1-np.power(np.tanh(x),2)
        elif activationName=="cos":
            return lambda x: np.cos(x) , lambda x: -np.sin(x)
        else:
            print("Wrong code")
            return lambda x: 0  
        
# 
        
    def forward_propagation(self ,w1, w2, x):
        z = self.h(x.dot(w1.T) )  
        z = np.hstack(( np.ones((z.shape[0],1)) , z )) 
        y = softmax( z.dot(w2.T) )
        return z, y

    def backward_propagation( self, x, t, z, y, w1, w2 , l):
        cost = np.sum(np.log(y)*t) - (l/2) * ( np.sum( np.square( w1 ) ) +  np.sum( np.square( w2 ) ) )    
        error = t-y
        dW1 = ( error.dot(w2[:,1:]) * self.hDeriv( x.dot(w1.T)) ).T.dot(x) - l*w1
        dW2 = error.T.dot(z) - l*w2
        return cost, dW1, dW2   
    
    
    def train(self , X_train, y_train, lRate , train_epochs, batchSize, l):
        X_train = np.append(np.ones((X_train.shape[0], 1)), X_train, axis=1)
        lRate = lRate / batchSize
        self.K = y_train.shape[1]
        self.D = X_train.shape[1]
         
        s1 = np.sqrt(2/self.D)
        s2 = np.sqrt(2/self.M + 1)       
        self.w1 = np.random.uniform(-s1 ,s1  , (self.M,self.D)) 
        self.w2 = np.random.uniform(-s2 ,s2  , (self.K,self.M+1))    
        
        for e in range(train_epochs):
            for batchX,batchT in self.createMiniBatches(X_train,y_train,batchSize):
                z,y = self.forward_propagation(self.w1, self.w2, batchX)
                cost, dW1, dW2 = self.backward_propagation( batchX, batchT, z, y, self.w1, self.w2 , l)
                self.w1 = self.w1 + lRate * dW1
                self.w2 = self.w2 + lRate * dW2

    
    
    def createMiniBatches(self , x, t,batchSize):
        K = 10
        stacked = np.hstack((x, t ))
        np.random.shuffle(stacked)  
        batchesList = []
        i=0
        while i<stacked.shape[0]:
            if (i+batchSize<stacked.shape[0]):
                batchesList.append(stacked[i:i+batchSize])
            else:
                batchesList.append( stacked[i:])

            i+=batchSize

        batchesList = [ [ batch[:,:-K] , batch[:,-K:] ]  for batch in batchesList  ]
        return batchesList
    
    
    def calculate_accuracy(self,X_test, t_test):
        X_test = np.hstack(( np.ones((X_test.shape[0],1)) , X_test ))
        _ , y = self.forward_propagation(self.w1 ,self.w2 ,X_test )        
        return np.mean( np.argmax(y,1) == np.argmax(t_test,1) )
        

In [8]:
# X_train, X_test, y_train, y_test = load_mnist()
X_train, X_test, y_train, y_test = load_cifar()
        
mlp = oneLayerMLP(100,"softplus")

mlp.train(X_train, y_train, 0.001, 20, 100, 0.1)
acc = mlp.calculate_accuracy(X_test, y_test)

In [9]:
acc

0.4047

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle

def unpickle(file):
    with open(file, 'rb') as fo:
        dictionary = pickle.load(fo, encoding='bytes')
    return dictionary

def load_cifar():
    X_train = []
    y_train = np.empty((0,10), int)
    for i in range(1,6):
        currentFile = unpickle("data/cifar-10-batches-py/data_batch_"+ str(i))
        current_X = currentFile[b'data']
        X_train.extend( current_X )
        current_labels = np.array(currentFile[b'labels'])
        current_y = np.squeeze(np.eye(10)[current_labels.reshape(-1)])
        y_train = np.vstack((y_train, current_y))

    testFile = unpickle("data/cifar-10-batches-py/test_batch")
    X_test = testFile[b'data']
    labels = np.array(testFile[b'labels'])
    y_test = np.squeeze(np.eye(10)[labels.reshape(-1)])

    return np.array(X_train)/255, np.array(X_test)/255, y_train , y_test


def load_mnist():

    df = None
    y_train = []
    for i in range( 10 ):
        tmp = pd.read_csv( 'data/mnist/train%d.txt' % i, header=None, sep=" " )
        hot_vector = [ 1 if j == i else 0 for j in range(10) ] 
        for j in range( tmp.shape[0] ):
            y_train.append( hot_vector )   
        if i == 0:
            df = tmp
        else:
            df = pd.concat( [df, tmp] )
    train_data = df.values
    y_train = np.array( y_train )
    
    df = None
    y_test = []
    for i in range( 10 ):
        tmp = pd.read_csv( 'data/mnist/test%d.txt' % i, header=None, sep=" " )
        hot_vector = [ 1 if j == i else 0 for j in range(0,10) ]
        for j in range( tmp.shape[0] ):
            y_test.append( hot_vector )  
        if i == 0:
            df = tmp
        else:
            df = pd.concat( [df, tmp] )
    test_data = df.values
    y_test = np.array( y_test )

    return train_data.astype(float)/255, test_data.astype(float)/255, y_train, y_test