# TensorFlow ELM example with Normal Equation Choleski-based Matrix Solver 

In [1]:
import keras 
from keras.datasets import mnist
import os
import tensorflow as tf
import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
mnist_data = mnist.load_data(os.path.join(os.getcwd(), 'mnist_dataset.csv'))

In [3]:
train_set, test_set = mnist_data
x_train, y_train = train_set
x_test, y_test = test_set

In [4]:
y_train = keras.utils.to_categorical(y_train, num_classes=10)
y_test = keras .utils.to_categorical(y_test, num_classes=10)

In [5]:
x_train = x_train.reshape(-1,28*28)
x_test=x_test.reshape(-1,28*28)
x_train.shape

(60000, 784)

## define ELM core computational TF graph 

In [6]:
with tf.name_scope("elm"): 
    
    with tf.name_scope("input"):
        input = tf.placeholder(tf.float32, shape=[None, 28*28], name="input")
        y = tf.placeholder(tf.float32, shape=[None, 10], name="input")
           
        
    with tf.name_scope("hidden_layer_1"):
        weights = tf.Variable(tf.random_normal(shape=[28*28,2048], stddev=1), trainable=False)
        biases = tf.Variable(tf.random_normal(shape=[2048], stddev=1),trainable=False)
        
        pre_activations = tf.matmul(input, weights) + biases
        
        activations = tf.sigmoid(pre_activations)
        
    

## Training and Testing of ELM 

**note how training is done via batching. 
After the activation matrix has been obtained (conc in the code) the solver is called.**

**Testing is made in one-pass thereafter**
** note how it is necessary to specify a different graph part for testing and prediction**

In [7]:
# get activations via batches 
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    print("initialized")
    batch_size = 10000
    
    num_batches = x_train.shape[0]//batch_size
    
    
    for i in range(num_batches):
        
        X_train = x_train[i*batch_size:((i+1)*batch_size)].astype('float32')
        Y_train = y_train[i*batch_size:((i+1)*batch_size)].astype('float32')
        print(X_train.shape)
        if i == 0: 
            act_prev = sess.run(activations,feed_dict={input:X_train, y:Y_train})
        else:
            act = sess.run(activations,feed_dict={input:X_train, y:Y_train})
            conc=tf.concat((act_prev,act),axis=0)
            act_prev=conc
    
    with tf.name_scope("optimizer"):
        optimum = tf.matrix_solve_ls(conc, y, 0,fast=True) 
        # the second is L2 reg parameter optimal is 100 to 1000
    
    with tf.name_scope("hpelm_optimizer"): 
        # cxx 
        phix = tf.einsum('ij,jk->ik', tf.transpose(act_prev), act_prev)
        phit = tf.einsum('ij,jk->ik', tf.transpose(act_prev), y)
        optimum2 = tf.einsum('ij,jk->ik',tf.matrix_inverse(phix), phit)
        

    with tf.name_scope("output_layer"):
        output = tf.matmul(conc,optimum)
        output2= tf.matmul(conc,optimum2)
        
    
    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
        correct_prediction2 = tf.equal(tf.argmax(output2, 1), tf.argmax(y, 1))
        accuracy2 = tf.reduce_mean(tf.cast(correct_prediction2, tf.float32))
        
    with tf.name_scope("test"):
        opt = tf.placeholder(tf.float32, shape=[2048, 10], name="input_for_test")
        opt2 = tf.placeholder(tf.float32, shape=[2048, 10], name="input_for_test")
        
        output_test = tf.matmul(activations,opt)
        output_test2 = tf.matmul(activations,opt2)
        
        test_correct_prediction = tf.equal(tf.argmax(output_test, 1), tf.argmax(y, 1))
        accuracy_test = tf.reduce_mean(tf.cast(test_correct_prediction, tf.float32))
        
        test_correct_prediction2 = tf.equal(tf.argmax(output_test2, 1), tf.argmax(y, 1))
        accuracy_test2 = tf.reduce_mean(tf.cast(test_correct_prediction2, tf.float32))
    
    
    print("initialized")

    time0 = time.time()
    
    acc_train2, weights2=sess.run([accuracy2, optimum2],
                                        feed_dict={input:x_train, y:y_train})
    
    print("Elapsed time training 1: %.5f" % (time.time() - time0))
    time0 = time.time()
    
    acc_train, weights=sess.run([accuracy, optimum],
                                        feed_dict={input:x_train, y:y_train})
    
    
    
    
    print("Elapsed time training 2: %5f" % (time.time()-time0))
    
    acc_test,acc_test2 =sess.run([accuracy_test,accuracy_test2],
                                  feed_dict={input:x_test, y:y_test, opt:weights, opt2:weights2})
        

Instructions for updating:
Use `tf.global_variables_initializer` instead.
initialized
(10000, 784)
(10000, 784)
(10000, 784)
(10000, 784)
(10000, 784)
(10000, 784)
initialized
Elapsed time training 1: 66.48664
Elapsed time training 2: 41.487451


In [8]:
print(acc_test)
print(acc_train)

0.9398
0.94603336


In [9]:
print(acc_test2)
print(acc_train2)

    
        
    
    

0.9398
0.94603336


1024
Elapsed time training 1: 26.85148
Elapsed time training 2: 13.085986