In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report

from rtbm import RTBM, minimizer

import rtbm.layers as layers
import rtbm.model as mdl

import warnings
warnings.filterwarnings('ignore')

from rtbm.costfunctions import mse, crossentropy


In [2]:
# Load MNIST dataset
MNIST_train = pd.read_csv('~/data/mnist_train.csv', delimiter=",",header=None).values
MNIST_test  = pd.read_csv('~/data/mnist_test.csv', delimiter=",",header=None).values

# Prepare data (normalized onto [0,1])
Y_train = MNIST_train[0:10000,0]
X_train = MNIST_train[0:10000,1:]/255.0

Y_test = MNIST_test[:,0]
X_test = MNIST_test[:,1:]/255.0

In [None]:
# Visualize individual pics
i=10
print(Y_train[i])
I=np.reshape(X_train[i], (28,28))
plt.imshow(I, interpolation='nearest',  cmap='gray_r')
plt.show()

# Logistic regression baseline

In [None]:
# Logistic regression baseline
logreg = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs')

logreg.fit(X_train,Y_train)


In [None]:
# On train set

P=logreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))


In [None]:
# On test set
P=logreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression base line

In [None]:
linreg = linear_model.LinearRegression()

linreg.fit(X_train,Y_train)


In [None]:
# On train set

P=np.round(linreg.predict(X_train))

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))

In [None]:
# On test set
P=linreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression via CMA

In [3]:
M = mdl.Model()
M.add(layers.NonLinear(784,1,paramBound=2))

minim = minimizer.CMA(False)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=500)

CMA on 1 cpu(s) enabled
(11_w,23)-aCMA-ES (mu_w=6.7,w_1=25%) in dimension 785 (seed=980176, Sun Nov  5 16:03:23 2017)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     23 1.091089214455234e+01 1.0e+00 1.98e-01  2e-01  2e-01 0:00.0
    2     46 1.052285359128973e+01 1.0e+00 1.96e-01  2e-01  2e-01 0:00.2
    3     69 1.058886429478716e+01 1.0e+00 1.95e-01  2e-01  2e-01 0:00.3
   28    644 1.007232177706450e+01 1.0e+00 1.73e-01  2e-01  2e-01 0:03.3
   62   1426 1.004320404635944e+01 1.0e+00 1.65e-01  2e-01  2e-01 0:07.4
  100   2300 1.003540741346584e+01 1.0e+00 1.64e-01  2e-01  2e-01 0:11.9
  151   3473 1.003195126092825e+01 1.1e+00 1.68e-01  2e-01  2e-01 0:18.0
  200   4600 1.003109241010481e+01 1.1e+00 1.71e-01  2e-01  2e-01 0:23.9
  267   6141 1.003019422727066e+01 1.2e+00 1.69e-01  2e-01  2e-01 0:32.0
  300   6900 1.002987303582147e+01 1.2e+00 1.69e-01  2e-01  2e-01 0:35.9
  383   8809 1.002950457936434e+01 1.2e+00 1.71e-01  2e-01  2e-01 0:46.0
  400  

In [4]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

          0       1.00      0.91      0.95      1001
          1       0.12      1.00      0.22      1127
          2       0.00      0.00      0.00       991
          3       0.00      0.00      0.00      1032
          4       0.00      0.00      0.00       980
          5       0.00      0.00      0.00       863
          6       0.00      0.00      0.00      1014
          7       0.00      0.00      0.00      1070
          8       0.00      0.00      0.00       944
          9       0.00      0.00      0.00       978

avg / total       0.11      0.20      0.12     10000

[[ 910   91    0    0    0    0    0    0    0    0]
 [   0 1127    0    0    0    0    0    0    0    0]
 [   2  989    0    0    0    0    0    0    0    0]
 [   0 1032    0    0    0    0    0    0    0    0]
 [   0  980    0    0    0    0    0    0    0    0]
 [   1  862    0    0    0    0    0    0    0    0]
 [   0 1014    0    0    0    0    0    0  

In [5]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

             precision    recall  f1-score   support

          0       0.97      0.90      0.93       980
          1       0.12      1.00      0.22      1135
          2       0.00      0.00      0.00      1032
          3       0.00      0.00      0.00      1010
          4       0.00      0.00      0.00       982
          5       0.00      0.00      0.00       892
          6       0.00      0.00      0.00       958
          7       0.00      0.00      0.00      1028
          8       0.00      0.00      0.00       974
          9       0.00      0.00      0.00      1009

avg / total       0.11      0.20      0.12     10000

[[ 878  102    0    0    0    0    0    0    0    0]
 [   0 1135    0    0    0    0    0    0    0    0]
 [   5 1027    0    0    0    0    0    0    0    0]
 [   4 1006    0    0    0    0    0    0    0    0]
 [   0  982    0    0    0    0    0    0    0    0]
 [   8  884    0    0    0    0    0    0    0    0]
 [   6  952    0    0    0    0    0    0  

# Linear regression via SGD

In [None]:
M = mdl.Model()
M.add(layers.Linear(784,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), Y_train.reshape(1,len(Y_train)), lr=1, maxiter=10000, batch_size=1000)

In [None]:
Y_train.shape

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))


# E(h|v) via SGD

In [None]:
M = mdl.Model()
M.add(layers.DiagExpectationUnitLayer(784,1))
#M.add(layers.DiagExpectationUnitLayer(10,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), Y_train.reshape(1,Y_train.shape[0]),lr=100,momentum=0.1, nesterov=True, maxiter=1000, batch_size=1000)

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))



# E(h|v) via CMA

In [None]:
M = mdl.Model()
M.add(layers.DiagExpectationUnitLayer(784,20, phase=1j))
M.add(layers.DiagExpectationUnitLayer(20,1, phase=1j))

minim = minimizer.CMA(False)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=100)

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))
