In [1]:
import numpy as np
from scipy.ndimage import convolve

import os
import numpy as np
from sklearn import linear_model
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.base import clone
from sklearn import metrics


In [3]:
n_categories = 10
n_variables = 121  # Number of variables in the dataset
mcm_filename_format = "train-images-unlabeled-{}_comms.dat"
data_filename_format = "train-images-unlabeled-{}.dat"
data_path = "../INPUT/data/"
communities_path = "../INPUT/MCMs/"
output_path = "../OUTPUT/"
data_size = 5423

# for k in range(n_categories):
X_train = np.empty((data_size*n_categories,n_variables))
for k in range(n_categories):
    im_path = os.path.join(data_path,data_filename_format.format(k))
    X_train[k*data_size:(k+1)*data_size, :] = np.genfromtxt(im_path,delimiter=1,dtype=int)
Y_train = np.repeat(np.arange(10),data_size)


In [4]:
# shape is (sample, image_size)
X_test = np.genfromtxt(os.path.join(data_path, "test-images-unlabeled-all-uniform.txt"),delimiter=1, dtype=int)
Y_test = np.loadtxt(os.path.join(data_path, "test-labels-uniform.txt"))


RBM

In [5]:
logistic = linear_model.LogisticRegression(solver="newton-cg", tol=1)
rbm = BernoulliRBM(random_state=0, verbose=True)
rbm_features_classifier = Pipeline(steps=[("rbm", rbm), ("logistic", logistic)])

In [6]:
rbm.learning_rate = 0.06
rbm.n_iter = 10

# model complexity. 
rbm.n_components = 100
logistic.C = 6000 # inverse regularisation

rbm_features_classifier.fit(X_train, Y_train)

[BernoulliRBM] Iteration 1, pseudo-likelihood = -39.23, time = 4.97s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -36.18, time = 2.76s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -33.82, time = 5.72s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -32.43, time = 3.91s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -32.75, time = 2.92s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -32.02, time = 5.51s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -31.27, time = 2.77s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -30.97, time = 2.76s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -31.36, time = 2.44s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -31.04, time = 2.39s


In [7]:

Y_pred = rbm_features_classifier.predict(X_test)
print(
    "Logistic regression using RBM features:\n%s\n"
    % (metrics.classification_report(Y_test, Y_pred))
)

Logistic regression using RBM features:
              precision    recall  f1-score   support

         0.0       0.96      0.98      0.97       892
         1.0       0.98      0.98      0.98       892
         2.0       0.93      0.93      0.93       892
         3.0       0.90      0.91      0.90       892
         4.0       0.91      0.88      0.90       892
         5.0       0.91      0.90      0.91       892
         6.0       0.96      0.96      0.96       892
         7.0       0.94      0.91      0.92       892
         8.0       0.89      0.90      0.90       892
         9.0       0.86      0.88      0.87       892

    accuracy                           0.92      8920
   macro avg       0.92      0.92      0.92      8920
weighted avg       0.92      0.92      0.92      8920




Logistic Model only

In [8]:
# only logistic model
logistic_clf = clone(logistic)
logistic_clf.C = 100.0
logistic_clf.fit(X_train, Y_train)

Y_pred = logistic_clf.predict(X_test)
print(
    "Logistic regression using raw pixel features:\n%s\n"
    % (metrics.classification_report(Y_test, Y_pred))
)

In [None]:
# def nudge_dataset(X, Y):
#     """
#     This produces a dataset 5 times bigger than the original one,
#     by moving the 8x8 images in X around by 1px to left, right, down, up
#     """
#     direction_vectors = [
#         [[0, 1, 0], [0, 0, 0], [0, 0, 0]],
#         [[0, 0, 0], [1, 0, 0], [0, 0, 0]],
#         [[0, 0, 0], [0, 0, 1], [0, 0, 0]],
#         [[0, 0, 0], [0, 0, 0], [0, 1, 0]],
#     ]

#     def shift(x, w):
#         return convolve(x.reshape((8, 8)), mode="constant", weights=w).ravel()

#     X = np.concatenate(
#         [X] + [np.apply_along_axis(shift, 1, X, vector) for vector in direction_vectors]
#     )
#     Y = np.concatenate([Y for _ in range(5)], axis=0)
#     return X, Y
