In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report

from rtbm import RTBM, minimizer

import rtbm.layers as layers
import rtbm.model as mdl

import warnings
warnings.filterwarnings('ignore')

from rtbm.costfunctions import mse, crossentropy


In [2]:
# Load MNIST dataset
MNIST_train = pd.read_csv('~/data/mnist_train.csv', delimiter=",",header=None).values
MNIST_test  = pd.read_csv('~/data/mnist_test.csv', delimiter=",",header=None).values

# Prepare data (normalized onto [0,1])
Y_train = MNIST_train[0:10000,0]
X_train = MNIST_train[0:10000,1:]/255.0

Y_test = MNIST_test[:,0]
X_test = MNIST_test[:,1:]/255.0

In [None]:
# Visualize individual pics
i=10
print(Y_train[i])
I=np.reshape(X_train[i], (28,28))
plt.imshow(I, interpolation='nearest',  cmap='gray_r')
plt.show()

# Logistic regression baseline

In [None]:
# Logistic regression baseline
logreg = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs')

logreg.fit(X_train,Y_train)


In [None]:
# On train set

P=logreg.predict(X_train)

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))


In [None]:
# On test set
P=logreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression base line

In [None]:
linreg = linear_model.LinearRegression()

linreg.fit(X_train,Y_train)


In [None]:
# On train set

P=np.round(linreg.predict(X_train))

print(classification_report(Y_train,P))
print(confusion_matrix(Y_train, P))

In [None]:
# On test set
P=linreg.predict(X_test)

print(classification_report(Y_test,P))
print(confusion_matrix(Y_test, P))


# Linear regression via CMA

In [None]:
M = mdl.Model()
M.add(layers.Linear(784,1,paramBound=2))

minim = minimizer.CMA(False)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=1000)

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))

# Linear regression via SGD

In [3]:
M = mdl.Model()
M.add(layers.Linear(784,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), Y_train.reshape(1,len(Y_train)), lr=0.01, maxiter=1000, batch_size=100)

Iteration 0 in 0.12(s), cost = 4.741850
Iteration 100 in 6.55(s), cost = 1.439542
Iteration 200 in 12.72(s), cost = 1.382140
Iteration 300 in 18.63(s), cost = 1.370364
Iteration 400 in 24.54(s), cost = 1.366269
Iteration 500 in 30.41(s), cost = 1.363773
Iteration 600 in 36.28(s), cost = 1.361896
Iteration 700 in 42.14(s), cost = 1.360439
Iteration 800 in 48.01(s), cost = 1.359308
Iteration 900 in 53.88(s), cost = 1.358426
('Cost: ', 1.357735812654203)
('Sol: ', array([ 3.09914903e+00, -2.37854084e-02, -1.45925573e-01,  9.78796431e-01,
       -6.77898679e-01, -3.95748587e-01, -7.76949155e-02,  5.00422061e-01,
       -1.87022513e-01, -5.84979153e-01,  6.95326343e-02,  6.78548750e-01,
        3.59677607e-01,  3.47664437e-01,  8.90005984e-01, -1.43999466e-02,
        7.40684522e-01, -8.20824635e-02,  4.66905869e-01, -3.05124199e-01,
        6.60657518e-01,  8.56375945e-01,  3.49045608e-01,  7.68047626e-01,
       -7.78342047e-02, -4.99011956e-01, -8.75683195e-01, -4.40385491e-01,
        8

In [10]:
Y_train.shape

(10000,)

In [4]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

             precision    recall  f1-score   support

        0.0       0.82      0.21      0.33      1001
        1.0       0.32      0.23      0.27      1127
        2.0       0.20      0.25      0.22       991
        3.0       0.27      0.32      0.29      1032
        4.0       0.12      0.17      0.14       980
        5.0       0.17      0.30      0.22       863
        6.0       0.22      0.30      0.25      1014
        7.0       0.29      0.30      0.29      1070
        8.0       0.16      0.13      0.14       944
        9.0       0.55      0.16      0.25       978
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0

avg / total       0.31      0.24      0.24     10000

[[206 380 242 104  38  22   7   2   0   0   0   0   0]
 [  4 256 523 226  85  26   5   2   0   0   0   0   0]
 [ 30  93 250 305 204  86  16   7   0   0   0   0   0]
 [  7  56 178 330 291 103  30  25   6

In [5]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))


             precision    recall  f1-score   support

        0.0       0.80      0.19      0.30       980
        1.0       0.31      0.21      0.25      1135
        2.0       0.16      0.20      0.18      1032
        3.0       0.25      0.30      0.27      1010
        4.0       0.13      0.18      0.15       982
        5.0       0.17      0.30      0.22       892
        6.0       0.17      0.25      0.20       958
        7.0       0.25      0.28      0.26      1028
        8.0       0.16      0.12      0.14       974
        9.0       0.48      0.12      0.19      1009
       10.0       0.00      0.00      0.00         0
       11.0       0.00      0.00      0.00         0
       12.0       0.00      0.00      0.00         0
       14.0       0.00      0.00      0.00         0

avg / total       0.29      0.21      0.22     10000

[[183 346 246 131  41  23   7   3   0   0   0   0   0   0]
 [  2 242 566 202  85  30   6   2   0   0   0   0   0   0]
 [ 32 129 211 283 225 110  31  

# E(h|v) via SGD

In [None]:
M = mdl.Model()
M.add(layers.DiagExpectationUnitLayer(784,10))
M.add(layers.DiagExpectationUnitLayer(10,1))

minim = minimizer.SGD()
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train),lr=0.1,momentum=0.5, nesterov=True,noise=1, maxiter=1000)

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))



# E(h|v) via CMA

In [None]:
M = mdl.Model()
M.add(layers.DiagExpectationUnitLayer(784,20, phase=1j))
M.add(layers.DiagExpectationUnitLayer(20,1, phase=1j))

minim = minimizer.CMA(False)
sol=minim.train(mse(), M, np.transpose(X_train), np.transpose(Y_train), maxiter=100)

In [None]:
# On train set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_train)))))


print(classification_report(Y_train,P.T))
print(confusion_matrix(Y_train, P.T))

In [None]:
# On test set
P=np.abs(np.round(np.real(M.predict(np.transpose(X_test)))))

print(classification_report(Y_test,P.T))
print(confusion_matrix(Y_test, P.T))
