In [None]:
!pip install larq

Collecting larq
[?25l  Downloading https://files.pythonhosted.org/packages/77/10/e3888b42d76f720de84cd27bb07cd0d3c52ffe7f075dc99e7a97853fb168/larq-0.10.0-py3-none-any.whl (60kB)
[K     |█████▍                          | 10kB 12.3MB/s eta 0:00:01[K     |██████████▉                     | 20kB 1.7MB/s eta 0:00:01[K     |████████████████▎               | 30kB 2.3MB/s eta 0:00:01[K     |█████████████████████▊          | 40kB 2.6MB/s eta 0:00:01[K     |███████████████████████████▏    | 51kB 2.0MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 1.8MB/s 
Collecting terminaltables>=3.1.0
  Downloading https://files.pythonhosted.org/packages/9b/c4/4a21174f32f8a7e1104798c445dacdc1d4df86f2f26722767034e4de4bff/terminaltables-3.1.0.tar.gz
Building wheels for collected packages: terminaltables
  Building wheel for terminaltables (setup.py) ... [?25l[?25hdone
  Created wheel for terminaltables: filename=terminaltables-3.1.0-cp36-none-any.whl size=15356 sha256=f808b5ef84fd2f1

In [None]:
import tensorflow as tf
import larq as lq

import keras.backend as K
from keras.datasets import mnist
from keras.utils import np_utils
from keras.callbacks import LearningRateScheduler

In [None]:
batch_size = 100
epochs = 20

# network
num_units = 4096
hidden_layers = 3
use_bias = False

# learning rate schedule
lr_start = 1e-3
lr_end = 1e-4
lr_decay = (lr_end / lr_start)**(1. / epochs)

# BN
e = 1e-6
momentum = 0.9

# dropout
drop_hidden = 0.5

In [17]:
# Import MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Flatten images
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)

# Normalize pixel values 
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, 10) * 2 - 1 # -1 or 1 for hinge loss
Y_test = np_utils.to_categorical(y_test, 10) * 2 - 1

60000 train samples
10000 test samples


In [18]:
# All quantized layers except the first will use the same options
kwargs = dict(input_quantizer="ste_sign",
              kernel_quantizer="ste_sign",
              kernel_constraint="weight_clip",
              kernel_initializer='glorot_uniform')

model = tf.keras.models.Sequential()

# In the first layer we only quantize the weights and not the input
model.add(lq.layers.QuantDense(num_units, 
                               kernel_quantizer="ste_sign",
                               kernel_constraint="weight_clip",
                               use_bias=use_bias,
                               input_shape=(784,), 
                               name='dense{}'.format(1)))
model.add(tf.keras.layers.BatchNormalization(epsilon=e, momentum=momentum, name='bn{}'.format(1)))
model.add(tf.keras.layers.Dropout(drop_hidden, name='drop{}'.format(1)))

for i in range(1, hidden_layers):
  model.add(lq.layers.QuantDense(num_units, use_bias=True, name='dense{}'.format(i+1), **kwargs))
  model.add(tf.keras.layers.BatchNormalization(epsilon=e, momentum=momentum, name='bn{}'.format(i+1)))
  model.add(tf.keras.layers.Dropout(drop_hidden, name='drop{}'.format(i+1)))

# L2 SVM Output layer
model.add(tf.keras.layers.Dense(10, kernel_regularizer=tf.keras.regularizers.l2()))
model.add(tf.keras.layers.Activation('linear'))


lq.models.summary(model)

+sequential_7 stats---------------------------------------------------------------------------+
| Layer         Input prec.     Outputs   # 1-bit  # 32-bit   Memory  1-bit MACs  32-bit MACs |
|                     (bit)                   x 1       x 1     (kB)                          |
+---------------------------------------------------------------------------------------------+
| dense1                  -  (-1, 4096)   3211264         0   392.00           0      3211264 |
| bn1                     -  (-1, 4096)         0      8192    32.00           0            0 |
| drop1                   -  (-1, 4096)         0         0        0           ?            ? |
| dense2                  1  (-1, 4096)  16777216      4096  2064.00    16777216            0 |
| bn2                     -  (-1, 4096)         0      8192    32.00           0            0 |
| drop2                   -  (-1, 4096)         0         0        0           ?            ? |
| dense3                  1  (-1, 4096) 

In [19]:
model.compile(tf.keras.optimizers.Adam(lr=0.01, decay=0.0001),
              loss='squared_hinge',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs)

test_loss, test_acc = model.evaluate(X_test, Y_test)

print(f"Test accuracy {test_acc * 100:.2f} %")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy 97.10 %
