In [None]:
import tensorflow as tf
from tensorflow.keras.models import clone_model
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import numpy as np

mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model1 = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

earlystp = EarlyStopping(monitor="loss", patience=5, verbose=1, mode='auto')

In [None]:
# Trial 1 batchsize 32 vs 256
epochs = 50

model1.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model1.fit(x_train, y_train, epochs=epochs, batch_size=32, callbacks=[earlystp])

model2 = clone_model(model1)
model2.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model2.fit(x_train, y_train, epochs=epochs, batch_size=256, callbacks=[earlystp])


In [None]:
# Trial 2 sgd learning rate 0.1 vs 1e-3
epochs = 50

optimizer = tf.keras.optimizers.SGD(lr=0.1)
model1.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model1.fit(x_train, y_train, epochs=epochs, batch_size=64, callbacks=[earlystp])

model2 = clone_model(model1)
optimizer = tf.keras.optimizers.SGD(lr=1e-3)
model2.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model2.fit(x_train, y_train, epochs=epochs, batch_size=64, callbacks=[earlystp])

In [None]:
# Trial 3 sgd vs adam
epochs = 50

optimizer = tf.keras.optimizers.Adam(lr=0.01)
model1.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model1.fit(x_train, y_train, epochs=epochs, batch_size=64, callbacks=[earlystp])
model2 = clone_model(model1)
optimizer = tf.keras.optimizers.SGD(lr=0.01)
model2.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model2.fit(x_train, y_train, epochs=epochs, batch_size=64, callbacks=[earlystp])

In [None]:
import tqdm

weights1 = model1.get_weights()
# weights2 = model2.get_weights()
weights2 = model1.get_weights()
pmodel = clone_model(model1)
pmodel.compile(optimizer=model1.optimizer, loss=model1.loss, metrics=['accuracy'])


# structure: [alpha, train_loss, train_acc, test_loss, test_acc]
results = []

for i in tqdm.tqdm(range(-100, 201)):
    alpha = i*0.01
    weights3 = [ (1-alpha)*t1 + alpha*t2 for t1, t2 in zip(weights1, weights2) ]
    pmodel.set_weights(weights3)
    train_loss, train_acc = pmodel.evaluate(x_train, y_train, verbose=0, batch_size=256)
    test_loss, test_acc = pmodel.evaluate(x_test, y_test, verbose=0, batch_size=256)
    results.append([alpha, train_loss, train_acc, test_loss, test_acc])
    
results = np.asarray(results)

In [None]:
import matplotlib.pyplot as plt
# fig, ax1 = plt.subplots()

# plots the training loss
plt.plot(results[:,0], results[:,1], 'b')
# plots the testing loss
plt.plot(results[:,0], results[:,3], 'b:')
plt.xlabel('alpha')
label = plt.ylabel('cross entropy loss')
label.set_color("blue")
plt.legend(['training', 'testing'], loc='upper left')

plt.twinx()
# plots the training accu
plt.plot(results[:,0], results[:,2], 'r')
# plots the testing accu
plt.plot(results[:,0], results[:,4], 'r:')

label = plt.ylabel('accuracy')
label.set_color("red")

plt.show()