In [1]:
# If you want to execute the notebook on colab, uncommet the following line.
#! git clone https://github.com/TimotheeMathieu/MOM_NN

import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam,RMSprop, Nadam

from tensorflow.keras.utils import to_categorical
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [2]:
print(X_train.shape)
# Flatten the data
X_train = X_train.reshape([X_train.shape[0], 784])
X_test = X_test.reshape([X_test.shape[0], 784])

(60000, 28, 28)


In [3]:
# Add 50 outliers randomly
X_train[np.random.permutation(len(X_train))[:50], :] = 0 # corresponds to an all black image

In [4]:
# one hot encoding
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

In [5]:
print(X_train.shape, y_train.shape)

(60000, 784) (60000, 10)


In [6]:
# Use dense network for simplification, a convnet would have been better.
input_layer = Input(shape=(len(X_train[0]), ))
encoder = Dense(32, activation="relu")(input_layer)
encoder = Dense(10, activation="softmax")(encoder)
model = Model(inputs=input_layer, outputs=encoder)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense (Dense)                (None, 32)                25120     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________


In [7]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
from sklearn.metrics import roc_auc_score

In [9]:
model.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f597ea48c40>

In [10]:
pred = model.predict(X_test).argmax(axis=1)
true_label  = y_test.argmax(axis=1)
print(np.mean(true_label == pred))

0.9075


# Use MOM

In [11]:
from keras_mom import MOM_model
from tensorflow.keras.losses import CategoricalCrossentropy, Reduction
# Initialize with the pre-trained model just used
model2 = MOM_model(model, CategoricalCrossentropy(reduction=Reduction.NONE), 
                   K=71, batch_size=64, max_iter=100, verbose=1)

In [12]:
model2.fit(X_train, y_train)
# code not really optimized but it computes. Because each epoch is only on sample_size/K points
# it needs more iterations to converge.

Epoch 2/2
Epoch 3/3
Epoch 4/4
Epoch 5/5
Epoch 6/6
Epoch 7/7
Epoch 8/8
Epoch 9/9
Epoch 10/10
Epoch 11/11
Epoch 12/12
Epoch 13/13
Epoch 14/14
Epoch 15/15
Epoch 16/16
Epoch 17/17
Epoch 18/18
Epoch 19/19
Epoch 20/20
Epoch 21/21
Epoch 22/22
Epoch 23/23
Epoch 24/24
Epoch 25/25
Epoch 26/26
Epoch 27/27
Epoch 28/28
Epoch 29/29
Epoch 30/30
Epoch 31/31
Epoch 32/32
Epoch 33/33
Epoch 34/34
Epoch 35/35
Epoch 36/36
Epoch 37/37
Epoch 38/38
Epoch 39/39
Epoch 40/40
Epoch 41/41
Epoch 42/42
Epoch 43/43
Epoch 44/44
Epoch 45/45
Epoch 46/46
Epoch 47/47
Epoch 48/48
Epoch 49/49
Epoch 50/50
Epoch 51/51
Epoch 52/52
Epoch 53/53
Epoch 54/54
Epoch 55/55
Epoch 56/56
Epoch 57/57
Epoch 58/58
Epoch 59/59
Epoch 60/60
Epoch 61/61
Epoch 62/62
Epoch 63/63
Epoch 64/64
Epoch 65/65
Epoch 66/66
Epoch 67/67
Epoch 68/68
Epoch 69/69
Epoch 70/70
Epoch 71/71
Epoch 72/72
Epoch 73/73
Epoch 74/74
Epoch 75/75
Epoch 76/76
Epoch 77/77
Epoch 78/78
Epoch 79/79
Epoch 80/80
Epoch 81/81
Epoch 82/82
Epoch 83/83
Epoch 84/84


Epoch 85/85
Epoch 86/86
Epoch 87/87
Epoch 88/88
Epoch 89/89
Epoch 90/90
Epoch 91/91
Epoch 92/92
Epoch 93/93
Epoch 94/94
Epoch 95/95
Epoch 96/96
Epoch 97/97
Epoch 98/98
Epoch 99/99
Epoch 100/100




MOM_model(K=71, batch_size=64,
          loss=<tensorflow.python.keras.losses.CategoricalCrossentropy object at 0x7f597c338bb0>,
          model=<tensorflow.python.keras.engine.functional.Functional object at 0x7f5a3ca129d0>,
          verbose=1)

In [15]:
pred = model2.predict(X_test).argmax(axis=1)
true_label  = y_test.argmax(axis=1)
print(np.mean(true_label == pred))

0.9161


In [16]:
np.arange(0, 10,3)

array([0, 3, 6, 9])