In [12]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report

from rtbm import RTBM, minimizer

import rtbm.layers as layers
import rtbm.model as mdl

import warnings
warnings.filterwarnings('ignore')

from rtbm.costfunctions import mse, crossentropy


In [13]:
# Load MNIST dataset
MNIST_train = pd.read_csv('~/data/mnist_train.csv', delimiter=",",header=None).values
MNIST_test  = pd.read_csv('~/data/mnist_test.csv', delimiter=",",header=None).values

# Prepare data (normalized onto [0,1])
Y_train = MNIST_train[0:10000,0]
X_train = MNIST_train[0:10000,1:]/255.0

Y_test = MNIST_test[:,0]
X_test = MNIST_test[:,1:]/255.0

In [None]:
# Visualize individual pics
i=10
print(Y_train[i])
I=np.reshape(X_train[i], (28,28))
plt.imshow(I, interpolation='nearest',  cmap='gray_r')
plt.show()

# Logistic regression baseline

In [None]:
# Logistic regression baseline
logreg = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs')

logreg.fit(X_train,Y_train)


In [None]:
# On train set

P=logreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))


In [None]:
# On test set
P=logreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression base line

In [None]:
linreg = linear_model.LinearRegression()

linreg.fit(X_train,Y_train)


In [None]:
# On train set

P=np.round(linreg.predict(X_train))

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))

In [None]:
# On test set
P=linreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression via CMA

In [None]:
M = mdl.Model()
M.add(layers.NonLinear(784,1,paramBound=2))

minim = minimizer.CMA(False)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=500)

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

# Linear regression via SGD

In [43]:
M = mdl.Model()
#M.add(layers.Linear(784,1))
M.add(layers.NonLinear(784,3))
M.add(layers.NonLinear(3,3))
M.add(layers.Linear(3,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), Y_train.reshape(1,len(Y_train)), lr=100, maxiter=1000, batch_size=1000)

#minim = minimizer.BFGS()
#sol=minim.train(mse(), M, np.transpose(X_train), Y_train.reshape(1,len(Y_train)), maxiter=1000)

Iteration 0 in 0.10(s), cost = 4.348408
Iteration 100 in 10.69(s), cost = 1.302175
Iteration 200 in 21.20(s), cost = 1.134513
Iteration 300 in 31.95(s), cost = 1.058071
Iteration 400 in 42.90(s), cost = 1.000400
Iteration 500 in 53.93(s), cost = 0.963352
Iteration 600 in 64.94(s), cost = 0.933435
Iteration 700 in 75.63(s), cost = 0.876319
Iteration 800 in 82.89(s), cost = 0.810045
Iteration 900 in 91.73(s), cost = 0.766195
('Cost: ', 0.7337537302427808)
('Sol: ', array([ 2.52333813, -0.59303184,  1.22115297, ...,  2.01471075,
        5.56609287,  2.61835274]))
Time: 100 s


In [44]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

          0       0.93      0.66      0.77      1001
          1       0.72      0.75      0.73      1127
          2       0.26      0.21      0.23       991
          3       0.36      0.44      0.40      1032
          4       0.10      0.04      0.05       980
          5       0.24      0.66      0.35       863
          6       0.26      0.15      0.19      1014
          7       0.29      0.16      0.21      1070
          8       0.27      0.61      0.37       944
          9       0.00      0.00      0.00       978

avg / total       0.35      0.37      0.34     10000

[[661 184  94  32   9  19   1   1   0   0]
 [  0 845 186  59  25   6   3   1   2   0]
 [ 28  74 204 599  40  33   3   3   7   0]
 [  9  64 250 450  93 110  36   9  11   0]
 [  2   1   4   7  35 809  80  25  17   0]
 [  2   3  27  42  72 566  91  44  16   0]
 [  3   4   3   9  48 683 156  83  25   0]
 [  0   1   9  16  18  86  93 176 671   0]
 [  0   0  11  28

In [45]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))


             precision    recall  f1-score   support

          0       0.90      0.64      0.75       980
          1       0.70      0.75      0.72      1135
          2       0.27      0.20      0.23      1032
          3       0.33      0.40      0.36      1010
          4       0.07      0.02      0.03       982
          5       0.23      0.60      0.33       892
          6       0.20      0.12      0.15       958
          7       0.21      0.13      0.16      1028
          8       0.27      0.60      0.37       974
          9       0.00      0.00      0.00      1009

avg / total       0.32      0.35      0.32     10000

[[626 169  99  20  25  30   6   3   2   0]
 [  1 853 194  47  18   6   7   7   2   0]
 [ 33  82 211 591  35  32  20  12  16   0]
 [ 21  78 187 405  71 137  48  39  24   0]
 [  0   4   5   2  22 783  78  62  26   0]
 [  3  13  33  58  52 534  89  70  40   0]
 [  6  11  13  22  61 637 117  64  27   0]
 [  0   7  13  33  21  74  65 136 679   0]
 [  0   5  13  30

# E(h|v) via SGD

In [14]:
M = mdl.Model()
M.add(layers.DiagExpectationUnitLayer(784,1))
#M.add(layers.DiagExpectationUnitLayer(10,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), Y_train.reshape(1,Y_train.shape[0]),lr=100,momentum=0.1, nesterov=True, maxiter=1000, batch_size=1000)

Iteration 0 in 0.50(s), cost = 4.009833


KeyboardInterrupt: 

Process PoolWorker-15:
Process PoolWorker-10:
Process PoolWorker-13:
Traceback (most recent call last):
Traceback (most recent call last):
Process PoolWorker-11:
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Process PoolWorker-12:
Process PoolWorker-16:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
Traceback (most recent call last):
Process PoolWorker-14:
Process PoolWorker-9:
    self.run()
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Traceback (most recent call last):
    self.run()
  File "/usr/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/usr/li

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))



# E(h|v) via CMA

In [9]:
M = mdl.Model()
M.add(layers.DiagExpectationUnitLayer(784,3, phase=1))
M.add(layers.DiagExpectationUnitLayer(3,1, phase=1))

minim = minimizer.CMA(False)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=1000)

CMA on 1 cpu(s) enabled
(13_w,27)-aCMA-ES (mu_w=7.8,w_1=22%) in dimension 2363 (seed=1057215, Mon Nov  6 06:03:49 2017)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     27 8.432775907943645e+00 1.0e+00 9.96e-01  1e+00  1e+00 0:00.2
    2     54 7.165652619614564e+00 1.0e+00 9.93e-01  1e+00  1e+00 0:01.5
    3     81 7.912105729280427e+00 1.0e+00 9.89e-01  1e+00  1e+00 0:02.7
    6    162 5.418252810319234e+00 1.0e+00 9.79e-01  1e+00  1e+00 0:06.1
   10    270 3.968259427223221e+00 1.0e+00 9.68e-01  1e+00  1e+00 0:10.8
   15    405 3.519140165301580e+00 1.0e+00 9.54e-01  1e+00  1e+00 0:15.9
   20    540 3.488005967874373e+00 1.0e+00 9.42e-01  9e-01  9e-01 0:22.2
   28    756 3.043410483177555e+00 1.0e+00 9.24e-01  9e-01  9e-01 0:30.2
   35    945 2.851399375488116e+00 1.0e+00 9.10e-01  9e-01  9e-01 0:38.2
   45   1215 2.675786713304412e+00 1.0e+00 8.94e-01  9e-01  9e-01 0:47.6
   55   1485 2.531320498799549e+00 1.0e+00 8.79e-01  9e-01  9e-01 0:58.3
   67

In [10]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.62      0.15      0.25      1001
        1.0       0.33      0.25      0.28      1127
        2.0       0.19      0.24      0.21       991
        3.0       0.20      0.27      0.23      1032
        4.0       0.11      0.17      0.13       980
        5.0       0.13      0.22      0.16       863
        6.0       0.14      0.18      0.16      1014
        7.0       0.25      0.24      0.25      1070
        8.0       0.20      0.12      0.15       944
        9.0       0.45      0.09      0.15       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0

avg / total       0.26      0.20      0.20     10000

[[155 305 254 145  73  44  16   8   1   0   0   0]
 [ 20 284 388 279 116  25  11   4   0   0   0   0]
 [ 51 154 241 244 170  83  30  14   3   1   0   0]
 [ 17  71 225 278 256 106  52  17   8   2   0   0]
 [  4  13  32  69 169 261 230 145  47  10   0   0]


In [11]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))


             precision    recall  f1-score   support

        0.0       0.54      0.14      0.22       980
        1.0       0.31      0.24      0.27      1135
        2.0       0.19      0.23      0.21      1032
        3.0       0.19      0.25      0.22      1010
        4.0       0.10      0.15      0.12       982
        5.0       0.15      0.24      0.18       892
        6.0       0.13      0.19      0.16       958
        7.0       0.21      0.22      0.22      1028
        8.0       0.23      0.14      0.17       974
        9.0       0.45      0.12      0.19      1009
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0

avg / total       0.25      0.19      0.20     10000

[[137 297 274 143  71  40  14   3   1   0   0   0]
 [ 28 278 390 293 106  33   4   3   0   0   0   0]
 [ 62 172 235 213 180 106  49   9   5   1   0   0]
 [ 19  92 189 253 236 121  72  19   7   1   1   0]
 [  3   8  21  59 149 270 266 160  36   8   2   0]
