In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report

from rtbm import RTBM, minimizer

import rtbm.layers as layers
import rtbm.model as mdl

import warnings
warnings.filterwarnings('ignore')

from rtbm.costfunctions import mse, crossentropy


In [2]:
# Load MNIST dataset
MNIST_train = pd.read_csv('~/data/mnist_train.csv', delimiter=",",header=None).values
MNIST_test  = pd.read_csv('~/data/mnist_test.csv', delimiter=",",header=None).values

# Prepare data (normalized onto [0,1])
Y_train = MNIST_train[0:10000,0]
X_train = MNIST_train[0:10000,1:]/255.0

Y_test = MNIST_test[:,0]
X_test = MNIST_test[:,1:]/255.0

In [None]:
# Visualize individual pics
i=10
print(Y_train[i])
I=np.reshape(X_train[i], (28,28))
plt.imshow(I, interpolation='nearest',  cmap='gray_r')
plt.show()

# Logistic regression baseline

In [None]:
# Logistic regression baseline
logreg = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs')

logreg.fit(X_train,Y_train)


In [None]:
# On train set

P=logreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))


In [None]:
# On test set
P=logreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression base line

In [None]:
linreg = linear_model.LogisticRegression()

linreg.fit(X_train,Y_train)


In [None]:
# On train set

P=linreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))

In [None]:
# On test set
P=linreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression via CMA

In [3]:
M = mdl.Model()
M.add(layers.Linear(784,1,paramBound=2))

minim = minimizer.CMA(True)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=100)

CMA on 16 cpu(s) enabled
(11_w,23)-aCMA-ES (mu_w=6.7,w_1=25%) in dimension 785 (seed=470287, Mon Oct 30 14:37:46 2017)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     23 1.366699218335736e+01 1.0e+00 1.98e-01  2e-01  2e-01 0:00.3
    2     46 1.250343804961948e+01 1.0e+00 1.96e-01  2e-01  2e-01 0:00.6
    3     69 1.163112977137717e+01 1.0e+00 1.95e-01  2e-01  2e-01 0:01.0
   11    253 6.881330076580614e+00 1.0e+00 1.85e-01  2e-01  2e-01 0:04.0
   22    506 5.828009783752440e+00 1.0e+00 1.76e-01  2e-01  2e-01 0:08.2
   36    828 5.159979009867663e+00 1.0e+00 1.67e-01  2e-01  2e-01 0:13.5
   52   1196 4.517113179871580e+00 1.0e+00 1.60e-01  2e-01  2e-01 0:19.5
   71   1633 4.284366442659932e+00 1.0e+00 1.54e-01  2e-01  2e-01 0:26.6
   93   2139 4.391630864209552e+00 1.0e+00 1.48e-01  1e-01  1e-01 0:34.7
  100   2300 4.230070078262100e+00 1.0e+00 1.47e-01  1e-01  1e-01 0:37.1
CMAEvolutionStrategyResult(xbest=array([ 1.39248599,  0.90722946, -1.16165584, 

In [4]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.21      0.12      0.15      1001
        1.0       0.24      0.23      0.24      1127
        2.0       0.17      0.20      0.18       991
        3.0       0.13      0.16      0.14      1032
        4.0       0.13      0.16      0.14       980
        5.0       0.09      0.12      0.10       863
        6.0       0.15      0.15      0.15      1014
        7.0       0.18      0.14      0.16      1070
        8.0       0.18      0.13      0.15       944
        9.0       0.27      0.12      0.16       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0
       14.0       0.00      0.00      0.00         0
       15.0       0.00      0.00      0.00         0
       16.0       0.00      0.00      0.00         0

avg / total       0.17      0.16      0.16  

In [5]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

             precision    recall  f1-score   support

        0.0       0.21      0.11      0.15       980
        1.0       0.25      0.25      0.25      1135
        2.0       0.16      0.19      0.17      1032
        3.0       0.13      0.16      0.15      1010
        4.0       0.11      0.15      0.13       982
        5.0       0.10      0.14      0.12       892
        6.0       0.12      0.12      0.12       958
        7.0       0.15      0.14      0.15      1028
        8.0       0.16      0.11      0.13       974
        9.0       0.29      0.12      0.17      1009
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       13.0       0.00      0.00      0.00         0
       14.0       0.00      0.00      0.00         0
       15.0       0.00      0.00      0.00         0
       16.0       0.00      0.00      0.00         0

avg / total       0.17      0.15      0.15  