In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report

from rtbm import RTBM, minimizer

import rtbm.layers as layers
import rtbm.model as mdl

import warnings
warnings.filterwarnings('ignore')

from rtbm.costfunctions import mse, crossentropy


In [3]:
# Load MNIST dataset
MNIST_train = pd.read_csv('~/data/mnist_train.csv', delimiter=",",header=None).values
MNIST_test  = pd.read_csv('~/data/mnist_test.csv', delimiter=",",header=None).values

# Prepare data (normalized onto [0,1])
Y_train = MNIST_train[0:10000,0]
X_train = MNIST_train[0:10000,1:]/255.0

Y_test = MNIST_test[:,0]
X_test = MNIST_test[:,1:]/255.0

In [None]:
# Visualize individual pics
i=10
print(Y_train[i])
I=np.reshape(X_train[i], (28,28))
plt.imshow(I, interpolation='nearest',  cmap='gray_r')
plt.show()

# Logistic regression baseline

In [None]:
# Logistic regression baseline
logreg = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs')

logreg.fit(X_train,Y_train)


In [None]:
# On train set

P=logreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))


In [None]:
# On test set
P=logreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression base line

In [None]:
linreg = linear_model.LogisticRegression()

linreg.fit(X_train,Y_train)


In [None]:
# On train set

P=linreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))

In [None]:
# On test set
P=linreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression via CMA

In [6]:
M = mdl.Model()
M.add(layers.Linear(784,1,paramBound=2))

minim = minimizer.CMA(True)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=1000)

CMA on 16 cpu(s) enabled
(11_w,23)-aCMA-ES (mu_w=6.7,w_1=25%) in dimension 785 (seed=454940, Mon Oct 30 14:44:43 2017)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     23 1.492180678962190e+01 1.0e+00 1.98e-01  2e-01  2e-01 0:00.4
    2     46 1.230647680330102e+01 1.0e+00 1.96e-01  2e-01  2e-01 0:00.7
    3     69 1.119993608579463e+01 1.0e+00 1.95e-01  2e-01  2e-01 0:01.0
   12    276 6.480818505721902e+00 1.0e+00 1.84e-01  2e-01  2e-01 0:04.2
   23    529 5.517513189515633e+00 1.0e+00 1.75e-01  2e-01  2e-01 0:08.5
   37    851 4.736834237349034e+00 1.0e+00 1.67e-01  2e-01  2e-01 0:13.8
   54   1242 4.745867963508003e+00 1.0e+00 1.61e-01  2e-01  2e-01 0:19.8
   72   1656 4.056317044951969e+00 1.0e+00 1.55e-01  2e-01  2e-01 0:27.0
   94   2162 4.099059856533170e+00 1.0e+00 1.52e-01  2e-01  2e-01 0:35.1
  100   2300 4.128655219245980e+00 1.0e+00 1.51e-01  2e-01  2e-01 0:37.4
  127   2921 4.187820657717249e+00 1.1e+00 1.48e-01  1e-01  1e-01 0:47.5
  157 

In [7]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.45      0.16      0.24      1001
        1.0       0.25      0.20      0.22      1127
        2.0       0.16      0.19      0.17       991
        3.0       0.18      0.22      0.20      1032
        4.0       0.14      0.17      0.15       980
        5.0       0.14      0.21      0.16       863
        6.0       0.16      0.20      0.18      1014
        7.0       0.20      0.20      0.20      1070
        8.0       0.19      0.15      0.17       944
        9.0       0.34      0.14      0.20       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0
       14.0       0.00      0.00      0.00         0
       15.0       0.00      0.00      0.00         0

avg / total       0.22      0.19      0.19     10000

[[162 332 244 136  71  32  17   5   0   1  

In [8]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

             precision    recall  f1-score   support

        0.0       0.41      0.15      0.22       980
        1.0       0.27      0.23      0.25      1135
        2.0       0.13      0.15      0.14      1032
        3.0       0.17      0.21      0.19      1010
        4.0       0.12      0.15      0.13       982
        5.0       0.13      0.20      0.16       892
        6.0       0.14      0.17      0.15       958
        7.0       0.20      0.20      0.20      1028
        8.0       0.19      0.15      0.17       974
        9.0       0.34      0.14      0.20      1009
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0
       14.0       0.00      0.00      0.00         0
       15.0       0.00      0.00      0.00         0

avg / total       0.21      0.18      0.18     10000

[[149 316 253 128  78  32  15   6   3   0  

# Linear regression via SGD

In [26]:
M = mdl.Model()
M.add(layers.Linear(784,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), lr=0.05, maxiter=1000)

(0, ' cost: ', 15.573956968413773)
(100, ' cost: ', 2.732805577147996)
(200, ' cost: ', 2.2958792233412102)
(300, ' cost: ', 2.0865333232594367)
(400, ' cost: ', 1.9627291848188113)
(500, ' cost: ', 1.881676011033768)
(600, ' cost: ', 1.8246989727259302)
(700, ' cost: ', 1.7824316550542554)
(800, ' cost: ', 1.7497259777421093)
(900, ' cost: ', 1.7235548246743602)
('SOL: ', array([  3.00075700e+00,   3.55108836e-01,  -5.04679917e-01,
        -5.36384756e-01,  -7.20957381e-01,   1.23986913e-01,
        -6.90047607e-01,  -2.29085407e-01,  -3.70616404e-01,
         2.95114322e-01,  -2.25595555e-01,   1.88125597e-01,
         5.34472906e-01,  -5.83286735e-01,   1.94749939e-01,
        -1.35273815e-01,  -6.34160297e-01,  -9.68766779e-01,
         8.94107224e-01,   5.17227691e-01,  -3.68448103e-04,
        -9.89652902e-01,  -2.41298692e-01,   1.34771766e-01,
         6.14241637e-01,  -7.30681737e-01,   1.19105519e-01,
         5.68723563e-01,  -4.87532661e-01,  -5.31350367e-01,
         2.320

In [27]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.75      0.21      0.33      1001
        1.0       0.34      0.25      0.29      1127
        2.0       0.19      0.24      0.21       991
        3.0       0.24      0.29      0.26      1032
        4.0       0.12      0.17      0.15       980
        5.0       0.16      0.29      0.21       863
        6.0       0.19      0.26      0.22      1014
        7.0       0.28      0.28      0.28      1070
        8.0       0.14      0.10      0.12       944
        9.0       0.50      0.12      0.20       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0

avg / total       0.29      0.22      0.23     10000

[[212 338 250 117  53  17  10   4   0   0   0   0   0   0]
 [  8 285 483 225  90  24  10   1   0   1   0   0   0   0]
 [ 38 115 242 274 213  69  32  

In [28]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))


             precision    recall  f1-score   support

        0.0       0.70      0.17      0.27       980
        1.0       0.32      0.25      0.28      1135
        2.0       0.18      0.22      0.20      1032
        3.0       0.23      0.28      0.25      1010
        4.0       0.14      0.19      0.16       982
        5.0       0.18      0.30      0.22       892
        6.0       0.17      0.24      0.20       958
        7.0       0.24      0.26      0.25      1028
        8.0       0.16      0.11      0.13       974
        9.0       0.48      0.12      0.19      1009
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0

avg / total       0.28      0.21      0.22     10000

[[167 345 249 131  51  23  12   2   0   0   0   0   0   0]
 [  2 284 493 240  88  17   9   2   0   0   0   0   0   0]
 [ 48 144 230 252 204 115  31  