In [None]:
# Change directory to VSCode workspace root so that relative path loads work correctly. Turn this addition off with the DataScience.changeDirOnImportExport setting
# ms-python.python added
import os
try:
	os.chdir(os.path.join(os.getcwd(), '../src'))
	print(os.getcwd())
except:
	pass


In [None]:
import os
import shutil
import sys
import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from keras.utils import to_categorical
from network import basenet, get_multitask_network, get_numberonly_network, \
    get_multitask_network_gradflip
from utils import colors as all_colors_rgb, color_MNIST, compile_model, evaluate_results

%load_ext autoreload
%autoreload 2

In [None]:
# import keras

gpu_options = tf.GPUOptions(allow_growth=True)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
keras.backend.tensorflow_backend.set_session(sess)

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())


In [None]:
INITIAL_LR = 1e-4
EPOCHS = 30
PATIENCE = EPOCHS  # no early stopping here
BIAS = 0.5
GRADFLIP_LAMBDA = .2
DATA_FOLDER = '../data'
LOSS = 'categorical_crossentropy'
N = 200

# choose some colors
colors = {0: 'dark red',
          1: 'navy',
          2: 'gold',
          3: 'aqua',
          4: 'indigo',
          5: 'deep pink',
          6: 'chocolate',
          7: 'honeydew',
          8: 'dark violet',
          9: 'beige'
          }
colors_inv = {v: k for k, v in colors.items()}
colors_rgb = {k: all_colors_rgb[v] for k, v in colors.items()}
classes = list(colors.keys())

## Color-biased MNIST

In [None]:
# DATA
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)

x_train_color, y_train_color = color_MNIST(zip(x_train, y_train), colors, colors_rgb, bias=BIAS)
x_test_color, y_test_color = color_MNIST(zip(x_test, y_test), colors, colors_rgb, bias=0)
x_test_color_ref, y_test_color_ref = color_MNIST(zip(x_test, y_test), colors, colors_rgb, bias=BIAS)

y_train_color_onehot = to_categorical([colors_inv[c] for c in y_train_color])
y_train_multi = {'color': y_train_color_onehot, 'number': y_train_onehot}

In [None]:
# write away some results to jpg for inspection
shutil.rmtree(DATA_FOLDER)
for f in ['train', 'test']:
    [os.makedirs(os.path.join(DATA_FOLDER, f, str(label)), exist_ok=True)
     for label in classes]
    [os.makedirs(os.path.join(DATA_FOLDER, f, str(label)), exist_ok=True)
     for label in classes]

for i in range(N):
    path = os.path.join(DATA_FOLDER, 'train', str(y_train[i]), str(i)+'.jpg')
    ret = cv2.imwrite(path, x_train_color[i])
    assert ret

for i in range(N):
    path = os.path.join(DATA_FOLDER, 'test', str(y_test[i]), str(i)+'.jpg')
    ret = cv2.imwrite(path, x_test_color[i])
    assert ret


 ## train: number only, confirm bias

In [None]:
simplenet = basenet(input_shape=(28, 28, 3), n_conv=3,
                    init_filter_size=20, dropout_rate=0)
model_number = get_numberonly_network(backbone=simplenet)
model_number, callbacks = compile_model(model_number, 'mnist_number',
                                        loss=LOSS,  initial_lr=INITIAL_LR, patience=PATIENCE)
model_number.fit(x_train_color, y_train_onehot, validation_split=.2,
                 epochs=EPOCHS, callbacks=callbacks)



In [None]:
# Without bias
y_pred = np.argmax(model_number.predict(x_test_color), axis=1)
evaluate_results(y_test, y_pred, classes)


 Our model has become worse in predicting the number.


### train: Gradient Reversal
 In https://github.com/feidfoe/learning-not-to-learn/blob/master/trainer.py we see the authors train with a minimax game and gradient reversal.

 In essence, this means the head for color still tries to extract color info from the shared embedding, but during backprop we flip the gradient between the start of the color head and the embedding layer, meaning the shared weights move away from allowing encoding color information.

 Let's try the gradient reversal.

In [None]:
simplenet = basenet(input_shape=(28, 28, 3), n_conv=3,
                    init_filter_size=20, dropout_rate=0)

In [None]:
model_gradflip = get_multitask_network_gradflip(simplenet, gradflip_lambda=GRADFLIP_LAMBDA)

LOSS = 'categorical_crossentropy'
loss_weights = {
    "number": 1,
    "color": 1
}

model_gradflip, callbacks = compile_model(model_gradflip, 'mnist_number_color', LOSS,
                                          loss_weights=loss_weights, initial_lr=INITIAL_LR, patience=PATIENCE)
model_gradflip.fit(x_train_color, y_train_multi, validation_split=.2,
                   epochs=EPOCHS, callbacks=callbacks)

 Now, lets predict on the inconsistently colored testset

In [None]:
# no consistent coloring in the testset, contrary to the trainingset
# todo: how are we guaranteed of the order of outputs?
y_pred_number, y_pred_color = model_gradflip.predict(x_test_color)
y_pred_number = np.argmax(y_pred_number, axis=1)
evaluate_results(y_test, y_pred_number, classes)


## Conclusion

Following settings:  
INITIAL_LR = 1e-4  
EPOCHS = 50  
PATIENCE = EPOCHS  
BIAS = 0.9  
GRADFLIP_LAMBDA = .1  

Our testset accuracy in predicting the number went up from 84.33 to 90.4 percent, which is pretty decent. 

 # Notes:
 * Bias cannot be set to 1. If it is 1, color and number information align exactly and the two heads fight for keeping/destroying that information, without being able to nuance. Noisy bias ensures that there is indeed color info in the embedding, **separable** from number info.
 * Use a more powerful color branch (more dense layers) than the number branch so that simple hiding of color info in the embedding isn't enough.