In [2]:
from theta.riemann_theta.riemann_theta import RiemannTheta

import pandas as pd
import numpy as np

from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report

import theano

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout, Reshape  
from keras.optimizers import SGD

from sklearn.preprocessing import LabelBinarizer

import time

from theta.rtbm import RTBM
from theta import minimizer

import theta.layers as layers
import theta.model as mdl

from theta.costfunctions import mse
from theta.activations import sigmoid, linear

Using Theano backend.


In [2]:
# Load MNIST dataset
MNIST_train = pd.read_csv('~/data/mnist_train.csv', delimiter=",",header=None).values
MNIST_test  = pd.read_csv('~/data/mnist_test.csv', delimiter=",",header=None).values

# Prepare data (normalized onto [0,1])
Y_train = MNIST_train[0:10000,0]
X_train = MNIST_train[0:10000,1:]/255.0

Y_test = MNIST_test[:,0]
X_test = MNIST_test[:,1:]/255.0

enc = LabelBinarizer()
enc.fit(np.diag([1,1,1,1,1,1,1,1,1,1]))
enc.classes_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
T=enc.transform(Y_train)

# Keras (500 linear + 100 linear + 1 linear + MSE)

In [None]:
model = Sequential() 

model.add(Dense(500,  input_dim=784))
model.add(Activation('linear'))
#model.add(Dense(100,  input_dim=784))
#model.add(Activation('linear'))
model.add(Dense(output_dim=1))
model.add(Activation('linear'))


sgd = SGD(lr=0.001)

tic = time.clock()

model.compile(loss='mse', optimizer=sgd)

toc = time.clock()

print("Compile time: ",toc-tic)

tic = time.clock()

model.fit(X_train, Y_train, batch_size=100, nb_epoch=100, validation_data=None, shuffle=False, verbose=1)  
toc = time.clock()

print("Run time: ",toc-tic)

In [None]:
# On train set
P=np.abs(np.round(np.real(model.predict(X_train)))).flatten()


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(model.predict(X_test)))).flatten()


print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

## With Theta and SGD


In [3]:
M = mdl.Model()
M.add(layers.NonLinear(784,500,linear()))
#M.add(layers.NonLinear(500,100,linear()))
M.add(layers.NonLinear(500,1,linear()))
M.set_bound(3)

minim = minimizer.SGD()
sol=minim.train(mse, M, np.transpose(X_train), Y_train.reshape(1,len(Y_train)), lr=0.001, maxiter=100, batch_size=10000)


Progress: |████████████████████| 100.0% | iteration 100 in 27.97(s) | cost = 4.838205


In [4]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))

print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.91      0.15      0.26      1001
        1.0       0.16      0.08      0.11      1127
        2.0       0.15      0.20      0.17       991
        3.0       0.17      0.27      0.21      1032
        4.0       0.07      0.14      0.10       980
        5.0       0.08      0.15      0.11       863
        6.0       0.09      0.12      0.11      1014
        7.0       0.23      0.19      0.21      1070
        8.0       0.15      0.06      0.09       944
        9.0       0.42      0.07      0.12       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0

avg / total       0.24      0.14      0.15     10000

[[149 347 249 128  70  33  18   7   0   0   0   0]
 [  0  95 488 345 138  46  11   3   1   0   0   0]
 [ 12  90 194 256 266 120  43   8   1   1   0   0]
 [  2  43 181 282 263 153  67  29   8   3   1   0]
 [  1   4  16  58 137 261 254 154  71  21   3   0]


  'recall', 'true', average, warn_for)


In [5]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))

print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.91      0.15      0.26      1001
        1.0       0.16      0.08      0.11      1127
        2.0       0.15      0.20      0.17       991
        3.0       0.17      0.27      0.21      1032
        4.0       0.07      0.14      0.10       980
        5.0       0.08      0.15      0.11       863
        6.0       0.09      0.12      0.11      1014
        7.0       0.23      0.19      0.21      1070
        8.0       0.15      0.06      0.09       944
        9.0       0.42      0.07      0.12       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0

avg / total       0.24      0.14      0.15     10000

[[149 347 249 128  70  33  18   7   0   0   0   0]
 [  0  95 488 345 138  46  11   3   1   0   0   0]
 [ 12  90 194 256 266 120  43   8   1   1   0   0]
 [  2  43 181 282 263 153  67  29   8   3   1   0]
 [  1   4  16  58 137 261 254 154  71  21   3   0]


# Keras (200 sigmoids + 10 sigmoids + 1 linear + MSE)

In [None]:
model = Sequential() 

model.add(Dense(10,  input_dim=784))
model.add(Activation('sigmoid'))
#model.add(Dense(10))
#model.add(Activation('sigmoid'))
model.add(Dense(output_dim=1))
model.add(Activation('linear'))


sgd = SGD(lr=0.1)

tic = time.clock()

model.compile(loss='mse', optimizer=sgd)

toc = time.clock()

print("Compile time: ",toc-tic)

tic = time.clock()

model.fit(X_train, Y_train, batch_size=10000, nb_epoch=200, validation_data=None, shuffle=False, verbose=0)  
toc = time.clock()

print("Run time: ",toc-tic)

In [None]:
# On train set
P=np.abs(np.round(np.real(model.predict(X_train)))).flatten()


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(model.predict(X_test)))).flatten()


print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

# With Theta and SGD

In [None]:
M = mdl.Model()
M.add(layers.NonLinear(784,10,sigmoid))
#M.add(layers.NonLinear(200,10,sigmoid)
M.add(layers.Linear(10,1))

minim = minimizer.SGD()
sol=minim.train(mse, M, np.transpose(X_train), Y_train.reshape(1,len(Y_train)), lr=0.1, maxiter=200, batch_size=10000)


In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))

print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))


# Keras (200 sigmoids + 10 Softmax + MSE)¶

In [None]:
model = Sequential() 

model.add(Dense(200,  input_dim=784))
model.add(Activation('sigmoid'))
model.add(Dense(10,  input_dim=784))
model.add(Activation('softmax'))


sgd = SGD(lr=0.001)

tic = time.clock()

model.compile(loss='mse', optimizer=sgd)

toc = time.clock()

print("Compile time: ",toc-tic)

tic = time.clock()

model.fit(X_train, T, batch_size=1000, nb_epoch=100, validation_data=None, shuffle=False, verbose=0)  
toc = time.clock()

print("Run time: ",toc-tic)

In [None]:
# On train set
P=np.argmax(model.predict(X_train),axis=1)

print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.argmax(model.predict(X_test),axis=1)

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

## theta

In [None]:
M = mdl.Model()
M.add(layers.NonLinear(784,200,sigmoid))
M.add(layers.Linear(200,10))
M.add(layers.SoftMaxLayer(10))

minim = minimizer.SGD()
sol=minim.train(mse, M, np.transpose(X_train), T.T, lr=0.001, maxiter=200, batch_size=1000)


In [None]:
P=np.argmax(M.predict(np.transpose(X_train)),axis=0)

print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
P=np.argmax(M.predict(np.transpose(X_test)),axis=0)


print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))