In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report

from rtbm import RTBM, minimizer

import rtbm.layers as layers
import rtbm.model as mdl

import warnings
warnings.filterwarnings('ignore')

from rtbm.costfunctions import mse, crossentropy


The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed',)).History will not be written to the database.


In [2]:
# Load MNIST dataset
MNIST_train = pd.read_csv('~/data/mnist_train.csv', delimiter=",",header=None).values
MNIST_test  = pd.read_csv('~/data/mnist_test.csv', delimiter=",",header=None).values

# Prepare data (normalized onto [0,1])
Y_train = MNIST_train[0:10000,0]
X_train = MNIST_train[0:10000,1:]/255.0

Y_test = MNIST_test[:,0]
X_test = MNIST_test[:,1:]/255.0

In [None]:
# Visualize individual pics
i=10
print(Y_train[i])
I=np.reshape(X_train[i], (28,28))
plt.imshow(I, interpolation='nearest',  cmap='gray_r')
plt.show()

# Logistic regression baseline

In [None]:
# Logistic regression baseline
logreg = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs')

logreg.fit(X_train,Y_train)


In [None]:
# On train set

P=logreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))


In [None]:
# On test set
P=logreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression base line

In [None]:
linreg = linear_model.LinearRegression()

linreg.fit(X_train,Y_train)


In [None]:
# On train set

P=np.round(linreg.predict(X_train))

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))

In [None]:
# On test set
P=linreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression via CMA

In [None]:
M = mdl.Model()
M.add(layers.Linear(784,1,paramBound=2))

minim = minimizer.CMA(False)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=1000)

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

# Linear regression via SGD

In [9]:
M = mdl.Model()
M.add(layers.Linear(784,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), lr=0.01, maxiter=1000)

Iteration 0 in 0.04(s), cost = 51.029886
Iteration 100 in 3.63(s), cost = 4.990281
Iteration 200 in 7.23(s), cost = 3.785383
Iteration 300 in 10.83(s), cost = 3.295191
Iteration 400 in 14.43(s), cost = 3.006738
Iteration 500 in 18.04(s), cost = 2.806643
Iteration 600 in 21.64(s), cost = 2.656275
Iteration 700 in 25.24(s), cost = 2.538062
Iteration 800 in 28.84(s), cost = 2.442369
Iteration 900 in 32.43(s), cost = 2.363243
('Cost: ', 2.2973336597521326)
('Sol: ', array([ 1.65043964, -0.19493395,  0.30851042, -0.15299829,  0.88331312,
       -0.40973687,  0.98787542,  0.2069944 ,  0.18384736, -0.16172127,
       -0.70903942,  0.70784304,  0.72634029, -0.51636314, -0.63211602,
       -0.35906959, -0.84871592,  0.50730292, -0.38631607, -0.7038732 ,
        0.22020866,  0.71352933,  0.45164325,  0.7800071 ,  0.98666499,
        0.83651597, -0.40847606,  0.34670479, -0.72387943,  0.08419134,
        0.52221697, -0.22481053, -0.30426903, -0.34664536,  0.16163132,
       -0.04824536,  0.461536

In [10]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.53      0.19      0.28      1001
        1.0       0.36      0.32      0.34      1127
        2.0       0.16      0.20      0.18       991
        3.0       0.22      0.26      0.24      1032
        4.0       0.12      0.16      0.13       980
        5.0       0.14      0.23      0.17       863
        6.0       0.17      0.21      0.19      1014
        7.0       0.24      0.22      0.23      1070
        8.0       0.18      0.12      0.15       944
        9.0       0.40      0.12      0.18       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0

avg / total       0.26      0.21      0.21     10000

[[194 341 235 112  62  32  18   7   0   0   0   0   0   0]
 [ 40 365 389 190  81  39  18   3   0   2   0   0   0   0]
 [ 71 143 198 203 184 111  50  

In [11]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))


             precision    recall  f1-score   support

        0.0       0.54      0.21      0.31       980
        1.0       0.35      0.32      0.33      1135
        2.0       0.17      0.20      0.19      1032
        3.0       0.20      0.23      0.21      1010
        4.0       0.14      0.17      0.15       982
        5.0       0.13      0.20      0.16       892
        6.0       0.13      0.17      0.15       958
        7.0       0.21      0.21      0.21      1028
        8.0       0.18      0.13      0.15       974
        9.0       0.40      0.13      0.20      1009
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0

avg / total       0.25      0.20      0.21     10000

[[210 340 207 121  56  28  13   5   0   0   0   0   0]
 [ 52 361 379 194  96  34  14   5   0   0   0   0   0]
 [ 66 140 208 215 173 132  56  25  12   4   1   0   0]
 [ 30 109 190 234 170 139  90  29   9

# E(h|v) via SGD

In [5]:
M = mdl.Model()
M.add(layers.DiagExpectationUnitLayer(784,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), lr=0.1, maxiter=1000)

Iteration 0 in 1.63(s), cost = 11.796787
Iteration 100 in 165.46(s), cost = 3.217344
Iteration 200 in 315.12(s), cost = 2.730699
Iteration 300 in 452.17(s), cost = 2.532331
Iteration 400 in 598.48(s), cost = 2.417017
Iteration 500 in 749.20(s), cost = 2.342984
Iteration 600 in 903.56(s), cost = 2.288717
Iteration 700 in 1057.71(s), cost = 2.245285
Iteration 800 in 1222.48(s), cost = 2.208795
Iteration 900 in 1375.65(s), cost = 2.177538
('Cost: ', (2.1508761350265022+0j))
('Sol: ', array([-3.20628814e+00+0.j,  5.01635783e-02+0.j, -1.17242831e-01+0.j,
        8.26052787e-01+0.j,  2.64293323e-01+0.j,  8.37135222e-01+0.j,
       -3.42613481e-01+0.j, -4.38661176e-01+0.j, -3.36967675e-01+0.j,
        5.67044333e-01+0.j,  1.82057105e-02+0.j, -9.82107222e-01+0.j,
        2.80964118e-01+0.j,  8.78083243e-01+0.j, -4.18351407e-03+0.j,
        3.39131628e-01+0.j,  9.14423181e-02+0.j,  7.74319628e-01+0.j,
        2.82993637e-01+0.j, -3.90397716e-01+0.j,  2.43945028e-01+0.j,
       -2.45903223e-01+0

In [6]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.84      0.23      0.36      1001
        1.0       0.33      0.24      0.28      1127
        2.0       0.17      0.22      0.19       991
        3.0       0.18      0.24      0.21      1032
        4.0       0.08      0.14      0.11       980
        5.0       0.10      0.17      0.12       863
        6.0       0.12      0.15      0.14      1014
        7.0       0.24      0.23      0.23      1070
        8.0       0.16      0.08      0.11       944
        9.0       0.43      0.08      0.14       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0

avg / total       0.27      0.18      0.19     10000

[[228 342 198 112  68  26  16  10   1   0   0   0   0]
 [  1 271 462 235 102  38  13   3   2   0   0   0   0]
 [ 28 117 220 279 191 103  40  11   1   1   0   0   0]
 [ 11  58 221 252 253 134  61  25  14

In [7]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))



             precision    recall  f1-score   support

        0.0       0.83      0.22      0.34       980
        1.0       0.32      0.24      0.27      1135
        2.0       0.17      0.21      0.18      1032
        3.0       0.18      0.23      0.20      1010
        4.0       0.10      0.15      0.12       982
        5.0       0.14      0.23      0.17       892
        6.0       0.12      0.19      0.15       958
        7.0       0.22      0.23      0.23      1028
        8.0       0.20      0.11      0.14       974
        9.0       0.43      0.10      0.16      1009
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0

avg / total       0.27      0.19      0.20     10000

[[211 329 219 107  65  37   8   2   2   0   0   0]
 [  0 269 473 241  97  38  13   4   0   0   0   0]
 [ 31 125 213 272 206 116  49  17   2   0   1   0]
 [ 11  78 211 230 202 143  77  46   7   4   1   0]
 [  0   2  11  50 145 258 289 151  60  11   4   1]


# E(h|v) via CMA

In [None]:
M = mdl.Model()
M.add(layers.DiagExpectationUnitLayer(784,1))

minim = minimizer.CMA(True)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=1000)

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))
