In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report

from rtbm import RTBM, minimizer

import rtbm.layers as layers
import rtbm.model as mdl

import warnings
warnings.filterwarnings('ignore')

from rtbm.costfunctions import mse, crossentropy


In [3]:
# Load MNIST dataset
MNIST_train = pd.read_csv('~/data/mnist_train.csv', delimiter=",",header=None).values
MNIST_test  = pd.read_csv('~/data/mnist_test.csv', delimiter=",",header=None).values

# Prepare data (normalized onto [0,1])
Y_train = MNIST_train[0:10000,0]
X_train = MNIST_train[0:10000,1:]/255.0

Y_test = MNIST_test[:,0]
X_test = MNIST_test[:,1:]/255.0

In [None]:
# Visualize individual pics
i=10
print(Y_train[i])
I=np.reshape(X_train[i], (28,28))
plt.imshow(I, interpolation='nearest',  cmap='gray_r')
plt.show()

# Logistic regression baseline

In [None]:
# Logistic regression baseline
logreg = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs')

logreg.fit(X_train,Y_train)


In [None]:
# On train set

P=logreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))


In [None]:
# On test set
P=logreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression base line

In [None]:
linreg = linear_model.LogisticRegression()

linreg.fit(X_train,Y_train)


In [None]:
# On train set

P=linreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))

In [None]:
# On test set
P=linreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression via CMA

In [6]:
M = mdl.Model()
M.add(layers.Linear(784,1,paramBound=2))

minim = minimizer.CMA(True)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=1000)

CMA on 16 cpu(s) enabled
(11_w,23)-aCMA-ES (mu_w=6.7,w_1=25%) in dimension 785 (seed=454940, Mon Oct 30 14:44:43 2017)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     23 1.492180678962190e+01 1.0e+00 1.98e-01  2e-01  2e-01 0:00.4
    2     46 1.230647680330102e+01 1.0e+00 1.96e-01  2e-01  2e-01 0:00.7
    3     69 1.119993608579463e+01 1.0e+00 1.95e-01  2e-01  2e-01 0:01.0
   12    276 6.480818505721902e+00 1.0e+00 1.84e-01  2e-01  2e-01 0:04.2
   23    529 5.517513189515633e+00 1.0e+00 1.75e-01  2e-01  2e-01 0:08.5
   37    851 4.736834237349034e+00 1.0e+00 1.67e-01  2e-01  2e-01 0:13.8
   54   1242 4.745867963508003e+00 1.0e+00 1.61e-01  2e-01  2e-01 0:19.8
   72   1656 4.056317044951969e+00 1.0e+00 1.55e-01  2e-01  2e-01 0:27.0
   94   2162 4.099059856533170e+00 1.0e+00 1.52e-01  2e-01  2e-01 0:35.1
  100   2300 4.128655219245980e+00 1.0e+00 1.51e-01  2e-01  2e-01 0:37.4
  127   2921 4.187820657717249e+00 1.1e+00 1.48e-01  1e-01  1e-01 0:47.5
  157 

In [7]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.45      0.16      0.24      1001
        1.0       0.25      0.20      0.22      1127
        2.0       0.16      0.19      0.17       991
        3.0       0.18      0.22      0.20      1032
        4.0       0.14      0.17      0.15       980
        5.0       0.14      0.21      0.16       863
        6.0       0.16      0.20      0.18      1014
        7.0       0.20      0.20      0.20      1070
        8.0       0.19      0.15      0.17       944
        9.0       0.34      0.14      0.20       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0
       14.0       0.00      0.00      0.00         0
       15.0       0.00      0.00      0.00         0

avg / total       0.22      0.19      0.19     10000

[[162 332 244 136  71  32  17   5   0   1  

In [8]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

             precision    recall  f1-score   support

        0.0       0.41      0.15      0.22       980
        1.0       0.27      0.23      0.25      1135
        2.0       0.13      0.15      0.14      1032
        3.0       0.17      0.21      0.19      1010
        4.0       0.12      0.15      0.13       982
        5.0       0.13      0.20      0.16       892
        6.0       0.14      0.17      0.15       958
        7.0       0.20      0.20      0.20      1028
        8.0       0.19      0.15      0.17       974
        9.0       0.34      0.14      0.20      1009
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0
       14.0       0.00      0.00      0.00         0
       15.0       0.00      0.00      0.00         0

avg / total       0.21      0.18      0.18     10000

[[149 316 253 128  78  32  15   6   3   0  

# Linear regression via SGD

In [13]:
M = mdl.Model()
M.add(layers.Linear(784,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), lr=0.01, maxiter=1000)

(0, ' cost: ', 15.922667548968759)
(100, ' cost: ', 4.7984606206495908)
(200, ' cost: ', 3.7660931768340067)
(300, ' cost: ', 3.3039397445559029)
(400, ' cost: ', 3.033424664418106)
(500, ' cost: ', 2.8489721851779946)
(600, ' cost: ', 2.7108713379087455)
(700, ' cost: ', 2.6012580938753764)
(800, ' cost: ', 2.5109250887868555)
(900, ' cost: ', 2.4345795665987655)
('SOL: ', array([ 1.51372709, -0.20996248,  0.00832895,  0.10827826,  0.92093349,
       -0.62722009,  0.07997827, -0.87068481, -0.22509246,  0.90628442,
        0.80356978, -0.26721447,  0.60315407,  0.88465885, -0.70456117,
        0.90397407, -0.48119746, -0.16439457,  0.55344017,  0.65348121,
       -0.79758334,  0.33519964, -0.00715261,  0.80515547, -0.96869872,
        0.6286935 , -0.33688859, -0.10374099,  0.26930579,  0.10249728,
       -0.30078416, -0.84547836, -0.52148305, -0.59633283,  0.83440817,
        0.56295861, -0.44068137, -0.62411915, -0.01703119, -0.72326241,
        0.03741576,  0.17499255, -0.11529285, -

In [16]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.49      0.20      0.28      1001
        1.0       0.31      0.27      0.29      1127
        2.0       0.15      0.18      0.17       991
        3.0       0.18      0.22      0.20      1032
        4.0       0.13      0.19      0.16       980
        5.0       0.12      0.18      0.14       863
        6.0       0.18      0.23      0.20      1014
        7.0       0.21      0.19      0.20      1070
        8.0       0.18      0.12      0.15       944
        9.0       0.37      0.12      0.18       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0
       14.0       0.00      0.00      0.00         0

avg / total       0.23      0.19      0.20     10000

[[197 331 220 113  82  35  17   4   2   0   0   0   0   0   0]
 [ 77 309 380 215  76  49  13   8

In [17]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))


             precision    recall  f1-score   support

        0.0       0.44      0.19      0.27       980
        1.0       0.32      0.28      0.30      1135
        2.0       0.16      0.19      0.17      1032
        3.0       0.20      0.22      0.21      1010
        4.0       0.13      0.18      0.16       982
        5.0       0.15      0.22      0.18       892
        6.0       0.15      0.21      0.18       958
        7.0       0.18      0.18      0.18      1028
        8.0       0.22      0.15      0.17       974
        9.0       0.38      0.12      0.18      1009
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0
       14.0       0.00      0.00      0.00         0

avg / total       0.23      0.20      0.20     10000

[[185 344 227 105  73  23  16   6   0   1   0   0   0   0   0]
 [ 80 321 391 195  95  36  10   7