# HIDDEN MARKOV NEURAL NETWORK: MNIST example

## Import libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
from sklearn import preprocessing


import pickle

import gzip

import numpy as np
import random

import BayesianNetwork

## Data processing

In [2]:
# Set the seed to guarantee reproducibility
seed_number = 123

random.seed(seed_number)
torch.manual_seed(seed_number)
np.random.seed(seed_number)

The files "train-labels-idx1-ubyte.gz" and "train-images-idx3-ubyte.gz" are downloaded from:

- http://yann.lecun.com/exdb/mnist/

In [3]:
def load_mnist():
    """Load MNIST data from `path`"""
    labels_path = 'train-labels-idx1-ubyte.gz'
    images_path = 'train-images-idx3-ubyte.gz'
        
    with gzip.open(labels_path, 'rb') as lbpath:
        lbpath.read(8)
        buffer = lbpath.read()
        labels = np.frombuffer(buffer, dtype=np.uint8)

    with gzip.open(images_path, 'rb') as imgpath:
        imgpath.read(16)
        buffer = imgpath.read()
        images = np.frombuffer(buffer, 
                               dtype=np.uint8).reshape(
            len(labels), 784).astype(np.float64)
 
    return images, labels

def mnist_preprocessing(x, y, sample_N = 600000, test_ratio = 0.25):

    x = np.float32(x) / 126.
    np.save("mnist_preprocessed_data", x)
    y = np.int32(y)
    np.save("mnist_preprocessed_target", y)
    idx = np.random.choice(x.shape[0], sample_N)
    x = x[idx]
    y = y[idx]

    tr_idx, te_idx = train_test_split(np.arange(sample_N), test_size = test_ratio)
    tr_x, te_x = x[tr_idx], x[te_idx]
    tr_y, te_y = y[tr_idx], y[te_idx]

    return tr_x,te_x,tr_y,te_y

In [4]:
x, y = load_mnist()
tr_x, va_x, tr_y, va_y = mnist_preprocessing(x, y)

# Split in training and validation: 
x_tr  = tr_x[0:50000]
y_tr  = tr_y[0:50000]

x_val = va_x[50000:60000]
y_val = va_y[50000:60000]

## Set the hyper parameters for the training

In [5]:
# The sample size
sample_size    = 10000
# The minibatch size
minibatch_size = 128
# The number of epochs
epocs          = 10 #Optimal value 600
# This parameter if use to retrain on part of the previous data. 
# If sliding = sample_size then we move to a new set of data.
sliding = 10000
# Number of Sequential training we want to do
T = 5

###########################################################
# Set the network structure
# Depth
L = 4
# Structure of the hidden units
architecture = np.array([784, 400, 400, 10])

# Kernel parameter of the HMNN
alpha_k = 0.75
sigma_k = np.exp(0)
c       = np.exp(7)
pi      = 0.5

# Mixture weight for the variational dropconnect
p       = 0.8

# Learning rate
lr_c = 1e-3

# Size of the Monte Carlo sample
mc_c = 1

## Training procedure

In [6]:
# Loss function
loss_function = torch.nn.CrossEntropyLoss(reduction = 'sum')


HMMNET = BayesianNetwork.torchHHMnet(architecture, alpha_k, sigma_k, c, pi, p, loss_function, sample_size, minibatch_size, epocs, T, sliding, workers = 4)

HMMNET.forward_pass(x_tr, y_tr, x_val, y_val, lr_c, mc_c)

Time  1
Epoch  1
Prior score  6102.19002499633  and Data score  36.924489422636384
Performance on the validation set  0.1875
Epoch  2
Prior score  5902.7162931253115  and Data score  36.81765671854652
Performance on the validation set  0.208
Epoch  3
Prior score  5821.309416513264  and Data score  36.17304855213777
Performance on the validation set  0.2083
Epoch  4
Prior score  5799.651061661626  and Data score  37.1636412828807
Performance on the validation set  0.2082
Epoch  5
Prior score  5782.221038800075  and Data score  36.072149800308416
Performance on the validation set  0.2772
Epoch  6
Prior score  5772.538798961223  and Data score  30.840180931557164
Performance on the validation set  0.3407
Epoch  7
Prior score  5774.630683208378  and Data score  26.55068336531712
Performance on the validation set  0.368
Epoch  8
Prior score  5772.740452740232  and Data score  22.94798457987191
Performance on the validation set  0.4237
Epoch  9
Prior score  5768.365613557698  and Data score 

The above simulation took around 20 min on a machine with CPU: 1.80GHz i7-8565U and Ram: 16Gb

## Test 

In [7]:
x_te = va_x[0:50000]
y_te = va_y[0:50000]

Test using the last time $t=5$

In [11]:
y_predicted     = np.zeros(len(y_te))
te_performance =0

output           = HMMNET.model_list[5].performance( torch.tensor( x_te, dtype = torch.float64 ) )
output_softmax   = F.softmax(output, dim=1)

y_predicted = np.array( range(0, 10) )[ np.argmax( output_softmax.data.numpy(), 1 ) ]

te_performance = sum(y_te == y_predicted)/len(y_te)
print("Performance on the test set ", te_performance)

Performance on the test set  0.83394
