# Pix2Code Improvement
Please read the readme for more details the solutions and models. This notebook only gives some code to use them.

To run the notebook, please first unpack the datasets/web/all_data.zip files. The final bypass model is also attached zipped in  model_instances/model_instances.zip for direct usage.

# Parameters

In [None]:
train_models=True

# Imports

In [None]:
from pathlib import Path
import sys
import datetime
import numpy as np


import tensorflow as tf
import tensorflow_addons as tfa

import shutil
import math
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import RMSprop

import os
import glob

# SETUP  LOGGING
import logging
logger = logging.getLogger()
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%I:%M:%S')
logging.info("Log started")

In [None]:
main_folder = Path('')
pix2code_folder = main_folder/'pix2code_improvement'
compiler_folder = main_folder/'compiler'
original_data_folder = main_folder / 'datasets'
model_instance_folder = main_folder / 'model_instances'

training_data_folder_path = 'datasets/web/training_data'
validation_data_folder_path ='datasets/web/validation_data'
test_data_folder_path ='datasets/web/test_data'

train_paths = list(glob.glob(os.path.join(training_data_folder_path, "*_256.npz")))
valid_paths = list(glob.glob(os.path.join(validation_data_folder_path, "*_256.npz")))
test_paths = list(glob.glob(os.path.join(test_data_folder_path, "*_256.npz")))
image_transformer = ImageDataGenerator(horizontal_flip=True, width_shift_range=5, fill_mode='nearest', vertical_flip=True)


sys.path.insert(1,str(pix2code_folder))
from constants import (START_WORD, END_WORD, PLACEHOLDER, ORIGINAL_DATA_FOLDER_NAME, TRAINING_DATA_FOLDER_NAME, VALIDATION_DATA_FOLDER_NAME, 
                       TEST_DATA_FOLDER_NAME, CONTEXT_LENGTH, IMAGE_SIZE, DOMAIN_LANGUAGES)
from eval_functions import eval_cnn_model, eval_code_error
from vocabulary import load_voc_from_file
from generators import dataset_generator_reader, features_only


voc = load_voc_from_file('datasets/web/vocabulary.vocab')
max_epochs=400
image_size = 256
batch_size =32
code_max_length=100

validation_ratio =0.1
test_ratio = 0.1

# Preprocessing
Preprocesses the data and produces .npz files containing the data for the samples in the train, validation and test folders. Required previous setup is that the dataset folder contains an 'web/all_data' folder containing the original image and code samples
    

In [None]:
from preprocessing import preprocess_data
preprocess_data(original_data_folder, domain_languages=['web'])

## Show transformations
This part is for the data augmentation, allowing the user to see what image transformations that are appropriat to use for the images when using the cnn/RNN network

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from utils import preprocess_image
only_include_words = ['btn-green','btn-orange','btn-red']
image_size = 256
# Load the paths and generators
training_data_folder_path = 'datasets/web/training_data'
image_train_paths = list(glob.glob(os.path.join(training_data_folder_path, "*.png")))
img_to_show = 5
img_gen = ImageDataGenerator(horizontal_flip=True, width_shift_range=5, fill_mode='nearest', vertical_flip=True)
fig, ax = plt.subplots(img_to_show, 3, figsize=(15, 5*img_to_show))
for i in range(img_to_show):
    img = preprocess_image(image_train_paths[i], IMAGE_SIZE)
    img_trans1 = img_gen.random_transform(img)
    img_trans2 = img_gen.random_transform(img)
    ax[i,0].imshow(img)
    _ = ax[i,0].set_title("Original")
    ax[i,1].imshow(img_trans1)
    _ = ax[i,1].set_title("Trans1")
    ax[i,2].imshow(img_trans2)
    _ = ax[i,2].set_title("Trans2")

# Web CNN Model evaluation

## Original model

In [None]:
from models.pix2code_original_cnn_model import Pix2CodeOriginalCnnModel

words_to_include = voc.object_words


model_save_path = 'model_instances/pix2code_original_cnn_model.h5'
train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=words_to_include, 
                                         image_transformer=image_transformer)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=words_to_include)
model_instance = Pix2CodeOriginalCnnModel(words_to_include)
pred = model_instance.predict(train_dataset.map(features_only).take(1))
model_instance.compile()

if train_models:
    # Prepare training
    logger.info("Training model")
    training_steps = int(len(train_paths)/batch_size)*8
    val_steps = int(len(valid_paths)/batch_size)
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                          validation_data=valid_dataset, validation_steps=val_steps,
                          epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
else:
    logger.info("Loading existing model")
    model_instance.load_weights(str(model_save_path))
# Eval
train_errors, train_y, train_predictions = eval_cnn_model(model_instance, train_paths, voc, words_to_include,'train_accuracy')
display(train_errors)
validation_errors, validation_y, validation_predictions = eval_cnn_model(model_instance, valid_paths, voc, words_to_include,'validation_accuracy')
display(validation_errors)
test_errors, test_y, test_predictions = eval_cnn_model(model_instance, test_paths, voc, words_to_include,'test_accuracy')
display(test_errors)


In [None]:
# Top check which files gives error
y_bad = validation_y[(validation_y!=validation_predictions.round()).apply(any,axis=1)]
pred_bad = validation_predictions[(validation_y!=validation_predictions.round()).apply(any,axis=1)]
print("Nr of incorrectly predicted files: {}".format(y_bad.shape[0]))
display(y_bad)
display(pred_bad)

## Shallow CNN model

In [None]:
from models.shallow_cnn_model import ShallowCnnModel

words_to_include = list(voc.object_words)
model_save_path = 'model_instances/shallow_cnn_model.h5'

train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=words_to_include, 
                                         image_transformer=image_transformer)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=words_to_include)

model_instance = ShallowCnnModel(words_to_include,dense_layer_size=512, dropout_ratio=0.1)
pred = model_instance.predict(train_dataset.map(features_only).take(1))
model_instance.compile(optimizer=RMSprop(lr=0.0001, clipvalue=1.0), loss='mse')

# Train
if train_models:
    logger.info("Training model")
    training_steps = int(len(train_paths)/batch_size)*8
    val_steps = int(len(valid_paths)/batch_size)
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
else:
    logger.info("Loading existing model")
    model_instance.load_weights(str(model_save_path))

train_errors, train_y, train_predictions = eval_cnn_model(model_instance, train_paths, voc, words_to_include,'train_accuracy')
display(train_errors)
validation_errors, validation_y, validation_predictions = eval_cnn_model(model_instance, valid_paths, voc, words_to_include,'validation_accuracy')
display(validation_errors)
test_errors, test_y, test_predictions = eval_cnn_model(model_instance, test_paths, voc, words_to_include,'test_accuracy')
display(test_errors)

In [None]:
# Top check which files gives error
y_bad = validation_y[(validation_y!=validation_predictions.round()).apply(any,axis=1)]
pred_bad = validation_predictions[(validation_y!=validation_predictions.round()).apply(any,axis=1)]
print("Nr of incorrectly predicted files: {}".format(y_bad.shape[0]))
display(y_bad)
display(pred_bad)

### CNN layer output inspection (object detection and positioning)

In [None]:
model_instance = ShallowCnnModel(words_to_include,dense_layer_size=512, dropout_ratio=0.1, image_out=True)
# Build the model
pred = model_instance.predict(train_dataset.map(features_only).take(1))
model_instance.compile(optimizer=RMSprop(lr=0.00001, clipvalue=1.0), loss='mse')
model_instance.load_weights(str(model_save_path))

file_id = 'ADE9B442-C9D4-4583-BF96-374D792ACF19'
npz_file = str(list(Path('datasets/web').rglob(file_id+"_256.npz"))[0])

temp = np.load(npz_file, allow_pickle=True)
img_data = temp['img_data']
pred_all = model_instance.predict({'img_data': np.reshape(img_data, tuple([1]+list(img_data.shape)))})
pred_all = {key:val[0] for key, val in pred_all.items()}
n_plots = int(len(pred_all)/2)+1
n_rows = math.ceil(n_plots/3)
fig, ax = plt.subplots(n_rows,3,figsize=(15, 5*n_rows), squeeze=False)

ax[0,0].imshow(cv2.cvtColor(img_data, cv2.COLOR_BGR2RGB),)
for i, word in enumerate(voc.object_words, start=1):
    if n_rows >1:
        ax[int(i/3),i%3].imshow(pred_all['img_out_{}'.format(word)].squeeze(-1))
        ax[int(i/3),i%3].set_title("{}:{}".format(word,pred_all[word+"_count"][0]))    
#plt.savefig('image_out_example.png')

# Web code model evaluation

## Original model - but trained until validation error rise

In [None]:

from models.pix2code_original_model import Pix2codeOriginalModel

model_save_path =  'model_instances/pix2code_original_model.h5'

# Build the model
train_dataset = dataset_generator_reader(train_paths, voc, 
                                         include_context_mode='single_word', include_code_mode='single_word',
                                        fixed_output_length=CONTEXT_LENGTH)
valid_dataset = dataset_generator_reader(valid_paths, voc, 
                                         include_context_mode='single_word', include_code_mode='single_word',
                                        fixed_output_length=CONTEXT_LENGTH)
model_instance = Pix2codeOriginalModel(voc.size)
pred = model_instance.predict(train_dataset.map(features_only).take(1))
model_instance.compile(optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

if train_models:
    logger.info("training_model")
    with open(training_data_folder_path + "/nr_of_instances.txt", 'r') as f:
        nr_of_training_instances = int(f.read())
    training_steps = int(nr_of_training_instances / batch_size)

    with open(validation_data_folder_path + "/nr_of_instances.txt", 'r') as f:
        nr_of_eval_instances = int(f.read())
    val_steps = int(nr_of_eval_instances / batch_size)

    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                             validation_data=valid_dataset, validation_steps=val_steps,
                             epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    train_df = eval_code_error(model_instance,train_paths,voc)
    train_df.to_pickle(model_save_path[:-3]+"_train_error.pickle")
    valid_df = eval_code_error(model_instance,valid_paths,voc)
    valid_df.to_pickle(model_save_path[:-3]+"_valid_error.pickle")
    test_df = eval_code_error(model_instance,test_paths,voc)
    test_df.to_pickle(model_save_path[:-3]+"_test_error.pickle")
else:
    if os.path.exists(model_save_path):
        logger.info("Loading weights")
        model_instance.load_weights(str(model_save_path))

    train_df = pd.read_pickle(model_save_path[:-3]+"_train_error.pickle")
    valid_df = pd.read_pickle(model_save_path[:-3]+"_valid_error.pickle")
    test_df = pd.read_pickle(model_save_path[:-3]+"_test_error.pickle")

for df, name in zip([train_df, valid_df,test_df],['train','valid','test']):    
    logger.info("{0} error: {1:.4f}, correct: {2:.4}, length: {3:.4}, active: {4:.4}, but: {5:.4}".format(
        name, df.error.mean(), df.correctly_predicted.mean(),  df.same_length.mean(), df.active_button_correct.mean(), 
        df.button_color_correct.mean()))

## Image model (Non-rnn model)

In [None]:

model_save_path = 'model_instances/shallow_image_model.h5'
from models.shallow_image_model import ShallowImageModel
# need generators with x as img, y as {word +_count:count for each word + code:50 first onehotencoded}
train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code', fixed_output_length=code_max_length)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code', fixed_output_length=code_max_length)

model_instance = ShallowImageModel(voc.words,image_count_words=voc.object_words, dense_layer_size=512)
# Build the model
pred = model_instance.predict(train_dataset.map(features_only).take(1))
model_instance.compile()


if train_models:
    training_steps = int(len(train_paths)/batch_size)*5
    val_steps = int(len(valid_paths)/batch_size)

    # Do the transfer learning
    transfer_learning_model_save_path = 'model_instances/shallow_cnn_model.h5'
    model_instance.load_weights(transfer_learning_model_save_path,by_name=True)
    
    # Do initial training with transferred layers locked
    for layer_name in ['cnn_unit','counter_unit']:    
        layer = model_instance.get_layer(layer_name)
        layer.trainable = False
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':1.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    
    # Unlock and continue training
    for layer_name in ['cnn_unit','counter_unit']:    
        layer = model_instance.get_layer(layer_name)
        layer.trainable = True
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':10.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.00001, clipvalue=1.0))

    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])    
    train_df = eval_code_error(model_instance,train_paths,voc)
    train_df.to_pickle(model_save_path[:-3]+"_train_error.pickle")    
    valid_df = eval_code_error(model_instance,valid_paths,voc)
    valid_df.to_pickle(model_save_path[:-3]+"_valid_error.pickle")    
    test_df = eval_code_error(model_instance,test_paths,voc)
    test_df.to_pickle(model_save_path[:-3]+"_test_error.pickle")
else:
    if os.path.exists(model_save_path):
        logger.info("Loading existing model")
        model_instance.load_weights(model_save_path)
    train_df = pd.read_pickle(model_save_path[:-3]+"_train_error.pickle")
    valid_df = pd.read_pickle(model_save_path[:-3]+"_valid_error.pickle")
    test_df = pd.read_pickle(model_save_path[:-3]+"_test_error.pickle")
for df, name in zip([train_df, valid_df,test_df],['train','valid','test']):    
    logger.info("{0} error: {1:.4f}, correct: {2:.4}, length: {3:.4}, active: {4:.4}, but: {5:.4}".format(
        name, df.error.mean(), df.correctly_predicted.mean(),  df.same_length.mean(), df.active_button_correct.mean(), 
        df.button_color_correct.mean()))

## RNN-image-model-repeat

In [None]:
from models.rnn_image_model_repeat import RnnImageModelRepeat
model_save_path = 'model_instances/rnn_image_model_repeat.h5'
train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code',include_context_mode='full_code', 
                                         fixed_output_length=code_max_length)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code', include_context_mode='full_code', 
                                         fixed_output_length=code_max_length)


model_instance = RnnImageModelRepeat(voc.words,image_count_words=voc.object_words, max_code_length=code_max_length,
                                     dense_layer_size=512)
# Build the model
pred = model_instance.predict(train_dataset.map(features_only).take(1))
model_instance.compile()

if train_models:
    training_steps = int(len(train_paths)/batch_size)*5
    val_steps = int(len(valid_paths)/batch_size)

    # Do the transfer learning
    transfer_learning_model_save_path = 'model_instances/shallow_image_model.h5'
    model_instance.load_weights(transfer_learning_model_save_path,by_name=True)
    # Do initial training with transferred layers locked
    for layer_name in ['cnn_unit','counter_unit','ordering_1','ordering_2','ordering_3']:    
        layer = model_instance.get_layer(layer_name)
        layer.trainable = False
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':1.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

    early_stopping_patience=5
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=early_stopping_patience, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps*5,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    # Unlock and continue training
    for layer_name in ['cnn_unit','counter_unit','ordering_1','ordering_2','ordering_3']:    
        layer = model_instance.get_layer(layer_name)
        layer.trainable = True
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':10.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.00001, clipvalue=1.0))

    early_stopping_patience=10
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=early_stopping_patience, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    train_df = eval_code_error(model_instance,train_paths,voc)
    train_df.to_pickle(model_save_path[:-3]+"_train_error.pickle")    
    valid_df = eval_code_error(model_instance,valid_paths,voc)
    valid_df.to_pickle(model_save_path[:-3]+"_valid_error.pickle")    
    test_df = eval_code_error(model_instance,test_paths,voc)
    test_df.to_pickle(model_save_path[:-3]+"_test_error.pickle")
else:
    if os.path.exists(model_save_path):
        logger.info("Loading existing model")
        model_instance.load_weights(model_save_path)

    train_df = pd.read_pickle(model_save_path[:-3]+"_train_error.pickle")
    valid_df = pd.read_pickle(model_save_path[:-3]+"_valid_error.pickle")
    test_df = pd.read_pickle(model_save_path[:-3]+"_test_error.pickle")
for df, name in zip([train_df, valid_df,test_df],['train','valid','test']):    
    logger.info("{0} error: {1:.4f}, correct: {2:.4}, length: {3:.4}, active: {4:.4}, but: {5:.4}".format(
        name, df.error.mean(), df.correctly_predicted.mean(),  df.same_length.mean(), df.active_button_correct.mean(), 
        df.button_color_correct.mean()))

## RNN-image-model-memory

In [None]:
from models.rnn_image_model_memory import RnnImageModelMemory
model_save_path = 'model_instances/rnn_image_model_memory.h5'
train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code',include_context_mode='full_code', 
                                         fixed_output_length=code_max_length)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code', include_context_mode='full_code', 
                                         fixed_output_length=code_max_length)

model_instance = RnnImageModelMemory(voc.words,image_count_words=voc.object_words, max_code_length=code_max_length,
                                     dense_layer_size=512)
# Build the model
pred = model_instance.predict(train_dataset.map(features_only).take(1))
model_instance.compile()

if train_models:
    training_steps = int(len(train_paths)/batch_size)*5
    val_steps = int(len(valid_paths)/batch_size)

    # Do the transfer learning
    transfer_learning_model_save_path = 'model_instances/shallow_image_model.h5'
    model_instance.load_weights(transfer_learning_model_save_path,by_name=True)
    # Do initial training with transferred layers locked
    for layer_name in ['cnn_unit','counter_unit','ordering_1','ordering_2','ordering_3']:    
        layer = model_instance.get_layer(layer_name)
        layer.trainable = False
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':1.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

    early_stopping_patience=5
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=early_stopping_patience, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps*5,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    # Unlock and continue training
    for layer_name in ['cnn_unit','counter_unit','ordering_1','ordering_2','ordering_3']:    
        layer = model_instance.get_layer(layer_name)
        layer.trainable = True
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':10.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.00001, clipvalue=1.0))

    early_stopping_patience=10
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=early_stopping_patience, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    train_df = eval_code_error(model_instance,train_paths,voc)
    train_df.to_pickle(model_save_path[:-3]+"_train_error.pickle")    
    valid_df = eval_code_error(model_instance,valid_paths,voc)
    valid_df.to_pickle(model_save_path[:-3]+"_valid_error.pickle")    
    test_df = eval_code_error(model_instance,test_paths,voc)
    test_df.to_pickle(model_save_path[:-3]+"_test_error.pickle")
else:
    if os.path.exists(model_save_path):
        logger.info("Loading existing model")
        model_instance.load_weights(model_save_path)

    train_df = pd.read_pickle(model_save_path[:-3]+"_train_error.pickle")
    valid_df = pd.read_pickle(model_save_path[:-3]+"_valid_error.pickle")
    test_df = pd.read_pickle(model_save_path[:-3]+"_test_error.pickle")
for df, name in zip([train_df, valid_df,test_df],['train','valid','test']):    
    logger.info("{0} error: {1:.4f}, correct: {2:.4}, length: {3:.4}, active: {4:.4}, but: {5:.4}".format(
        name, df.error.mean(), df.correctly_predicted.mean(),  df.same_length.mean(), df.active_button_correct.mean(), 
        df.button_color_correct.mean()))


## RNN TFA architecture

In [None]:

from models.rnn_image_model_tfa import RnnImageModelTfa
model_save_path = 'model_instances/rnn_image_model_tfa.h5'

train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code',include_context_mode='full_code', 
                                         fixed_output_length=code_max_length)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code', include_context_mode='full_code', 
                                         fixed_output_length=code_max_length)

model_instance = RnnImageModelTfa(voc.words,image_count_words=voc.object_words, max_code_length=code_max_length,
                                     dense_layer_size=512)
# Build the model
pred = model_instance.predict(train_dataset.map(features_only).take(1))
loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
loss.update({'code':'categorical_crossentropy'})
model_instance.compile(loss=loss,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))
if train_models:
    training_steps = int(len(train_paths)/batch_size)*5
    val_steps = int(len(valid_paths)/batch_size)

    # Do the transfer learning
    transfer_learning_model_save_path = 'model_instances/shallow_image_model.h5'
    model_instance.load_weights(transfer_learning_model_save_path,by_name=True)
    # Do initial training with transferred layers locked
    for layer_name in ['cnn_unit','counter_unit','ordering_1','ordering_2','ordering_3']:    
        layer = model_instance.get_layer(layer_name)
        layer.trainable = False
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':100.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

    early_stopping_patience=5
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=early_stopping_patience, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps*5,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    # Unlock and continue training
    for layer_name in ['cnn_unit','counter_unit','ordering_1','ordering_2','ordering_3']:    
        layer = model_instance.get_layer(layer_name)
        layer.trainable = True
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':10.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.00001, clipvalue=1.0))

    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])

    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.00001, clipvalue=1.0))


    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    train_df = eval_code_error(model_instance,train_paths,voc)
    train_df.to_pickle(model_save_path[:-3]+"_train_error.pickle")    
    valid_df = eval_code_error(model_instance,valid_paths,voc)
    valid_df.to_pickle(model_save_path[:-3]+"_valid_error.pickle")    
    test_df = eval_code_error(model_instance,test_paths,voc)
    test_df.to_pickle(model_save_path[:-3]+"_test_error.pickle")
else:
    if os.path.exists(model_save_path):
        logger.info("Loading existing model")
        model_instance.load_weights(model_save_path)

    train_df = pd.read_pickle(model_save_path[:-3]+"_train_error.pickle")
    valid_df = pd.read_pickle(model_save_path[:-3]+"_valid_error.pickle")
    test_df = pd.read_pickle(model_save_path[:-3]+"_test_error.pickle")
for df, name in zip([train_df, valid_df,test_df],['train','valid','test']):    
    logger.info("{0} error: {1:.4f}, correct: {2:.4}, length: {3:.4}, active: {4:.4}, but: {5:.4}".format(
        name, df.error.mean(), df.correctly_predicted.mean(),  df.same_length.mean(), df.active_button_correct.mean(), 
        df.button_color_correct.mean()))

## RNN TFA - Repeat architecture

In [None]:


from models.rnn_image_model_tfa_repeat import RnnImageModelTfaRepeat
model_save_path = 'model_instances/rnn_image_model_tfa_repeat.h5'
train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code',include_context_mode='full_code', 
                                         fixed_output_length=code_max_length)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code', include_context_mode='full_code', 
                                         fixed_output_length=code_max_length)

model_instance = RnnImageModelTfaRepeat(voc.words,image_count_words=voc.object_words, max_code_length=code_max_length,
                                     dense_layer_size=512, dropout_ratio=0.25)
# Build the model
pred = model_instance.predict(train_dataset.map(features_only).take(1))
#model_instance.compile()
loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
loss.update({'code':'categorical_crossentropy'})
model_instance.compile(loss=loss,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))
if train_models:
    training_steps = int(len(train_paths)/batch_size)*5
    val_steps = int(len(valid_paths)/batch_size)

    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':1.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

    early_stopping_patience=5
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=early_stopping_patience, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':10.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.00001, clipvalue=1.0))

    early_stopping_patience=20
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=early_stopping_patience, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    train_df = eval_code_error(model_instance,train_paths,voc)
    train_df.to_pickle(model_save_path[:-3]+"_train_error.pickle")    
    valid_df = eval_code_error(model_instance,valid_paths,voc)
    valid_df.to_pickle(model_save_path[:-3]+"_valid_error.pickle")    
    test_df = eval_code_error(model_instance,test_paths,voc)
    test_df.to_pickle(model_save_path[:-3]+"_test_error.pickle")
else:
    if os.path.exists(model_save_path):
        logger.info("Loading existing model")
        model_instance.load_weights(model_save_path)

    train_df = pd.read_pickle(model_save_path[:-3]+"_train_error.pickle")
    valid_df = pd.read_pickle(model_save_path[:-3]+"_valid_error.pickle")
    test_df = pd.read_pickle(model_save_path[:-3]+"_test_error.pickle")
for df, name in zip([train_df, valid_df,test_df],['train','valid','test']):    
    logger.info("{0} error: {1:.4f}, correct: {2:.4}, length: {3:.4}, active: {4:.4}, but: {5:.4}".format(
        name, df.error.mean(), df.correctly_predicted.mean(),  df.same_length.mean(), df.active_button_correct.mean(), 
        df.button_color_correct.mean()))

## RNN TFA - Repeat (bypass) architecture

In [None]:

from models.rnn_image_model_tfa_repeat_bypass import RnnImageModelTfaRepeatBypass
model_save_path = 'model_instances/rnn_image_model_tfa_repeat_bypass.h5'
# need generators with x as img, y as {word +_count:count for each word + code:50 first onehotencoded}
train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code',include_context_mode='full_code', 
                                         fixed_output_length=code_max_length, batch_size=32)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code', include_context_mode='full_code', 
                                         fixed_output_length=code_max_length, batch_size=32)

model_instance = RnnImageModelTfaRepeatBypass(voc.words,image_count_words=voc.object_words, max_code_length=code_max_length,
                                     dense_layer_size=512, dropout_ratio=0.25, order_layer_output_size=512, cell_type='lstm', 
                                              pool_using_strides=False)
# Build the model
pred = model_instance.predict(train_dataset.map(features_only).take(1))
loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
loss.update({'code':'categorical_crossentropy'})
model_instance.compile(loss=loss,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))
    
if train_models:
    training_steps = int(len(train_paths)/batch_size)*5
    val_steps = int(len(valid_paths)/batch_size)

    # Do the transfer learning
    transfer_learning_model_save_path = 'model_instances/rnn_image_model_tfa_repeat_bypass.h5'
    model_instance.load_weights(transfer_learning_model_save_path,by_name=True, skip_mismatch=True)
    for layer in model_instance.layers:    
        if layer.name == 'ordering_final':
            layer.trainable = True
            logger.info("setting {} trainable".format(layer.name))
        else:
            layer.trainable = False
            logger.info("setting {} not trainable".format(layer.name))

    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':1.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps*2,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    for layer in model_instance.layers:        
        layer.trainable = True
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':10.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.00001, clipvalue=1.0))
    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    train_df = eval_code_error(model_instance,train_paths,voc)
    train_df.to_pickle(model_save_path[:-3]+"_train_error.pickle")    
    valid_df = eval_code_error(model_instance,valid_paths,voc)
    valid_df.to_pickle(model_save_path[:-3]+"_valid_error.pickle")    
    test_df = eval_code_error(model_instance,test_paths,voc)
    test_df.to_pickle(model_save_path[:-3]+"_test_error.pickle")
else:
    if os.path.exists(model_save_path):
        logger.info("Loading existing model")
        model_instance.load_weights(model_save_path)

    train_df = pd.read_pickle(model_save_path[:-3]+"_train_error.pickle")
    valid_df = pd.read_pickle(model_save_path[:-3]+"_valid_error.pickle")
    test_df = pd.read_pickle(model_save_path[:-3]+"_test_error.pickle")
for df, name in zip([train_df, valid_df,test_df],['train','valid','test']):    
    logger.info("{0} error: {1:.4f}, correct: {2:.4}, length: {3:.4}, active: {4:.4}, but: {5:.4}".format(
        name, df.error.mean(), df.correctly_predicted.mean(),  df.same_length.mean(), df.active_button_correct.mean(), 
        df.button_color_correct.mean()))

## RNN TFA - Repeat (bypass) architecture with strides

In [None]:
from models.rnn_image_model_tfa_repeat_bypass import RnnImageModelTfaRepeatBypass
model_save_path = 'model_instances/rnn_image_model_tfa_repeat_bypass_strides.h5'
# need generators with x as img, y as {word +_count:count for each word + code:50 first onehotencoded}
train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code',include_context_mode='full_code', 
                                         fixed_output_length=code_max_length, batch_size=32)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code', include_context_mode='full_code', 
                                         fixed_output_length=code_max_length, batch_size=32)

model_instance = RnnImageModelTfaRepeatBypass(voc.words,image_count_words=voc.object_words, max_code_length=code_max_length,
                                     dense_layer_size=512, dropout_ratio=0.25, order_layer_output_size=512, pool_using_strides=True)
# Build the model
pred = model_instance.predict(train_dataset.map(features_only).take(1))
loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
loss.update({'code':'categorical_crossentropy'})
model_instance.compile(loss=loss,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))
if train_models:
    training_steps = int(len(train_paths)/batch_size)*5
    val_steps = int(len(valid_paths)/batch_size)

    # Do the transfer learning
    transfer_learning_model_save_path = 'model_instances/rnn_image_model_tfa_repeat_bypass.h5'
    model_instance.load_weights(transfer_learning_model_save_path,by_name=True, skip_mismatch=True)
    for layer in model_instance.layers:    
        if "_pool" not in layer.name:
            layer.trainable = True
            logger.info("setting {} trainable".format(layer.name))
        else:
            layer.trainable = False
            logger.info("setting {} not trainable".format(layer.name))

    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':1.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

    early_stopping_patience=5
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=early_stopping_patience, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps*2,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    for layer in model_instance.layers:    
        layer.trainable = True
    loss = {word+"_count": 'mse' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    loss_weights = {word+"_count": 1/len( model_instance.image_count_words) for word in model_instance.image_count_words}
    loss_weights.update({'code':10.0})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.0001, clipvalue=1.0))

    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_save_path, save_best_only=True, save_weights_only=True)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    model_instance.save_weights('model_instances/rnn_image_model_tfa_repeat_bypass_strides_memless_backup.h')
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)
    loss = {word+"_count": 'mae' for word in model_instance.image_count_words}
    loss.update({'code':'categorical_crossentropy'})
    model_instance.compile(loss=loss, loss_weights=loss_weights,optimizer=RMSprop(lr=0.00001, clipvalue=1.0))
    hist = model_instance.fit(train_dataset, steps_per_epoch=training_steps,
                              validation_data=valid_dataset, validation_steps=val_steps,
                              epochs=max_epochs, callbacks=[checkpoint_cb, early_stopping_cb])
    train_df = eval_code_error(model_instance,train_paths,voc)
    train_df.to_pickle(model_save_path[:-3]+"_train_error.pickle")    
    valid_df = eval_code_error(model_instance,valid_paths,voc)
    valid_df.to_pickle(model_save_path[:-3]+"_valid_error.pickle")    
    test_df = eval_code_error(model_instance,test_paths,voc)
    test_df.to_pickle(model_save_path[:-3]+"_test_error.pickle")
else:
    if os.path.exists(model_save_path):
        logger.info("Loading existing model")
        model_instance.load_weights(model_save_path)

    train_df = pd.read_pickle(model_save_path[:-3]+"_train_error.pickle")
    valid_df = pd.read_pickle(model_save_path[:-3]+"_valid_error.pickle")
    test_df = pd.read_pickle(model_save_path[:-3]+"_test_error.pickle")
for df, name in zip([train_df, valid_df,test_df],['train','valid','test']):    
    logger.info("{0} error: {1:.4f}, correct: {2:.4}, length: {3:.4}, active: {4:.4}, but: {5:.4}".format(
        name, df.error.mean(), df.correctly_predicted.mean(),  df.same_length.mean(), df.active_button_correct.mean(), 
        df.button_color_correct.mean()))

# Plot image output
Example for tfa repat bypass with strides

In [None]:
from models.rnn_image_model_tfa_repeat_bypass import RnnImageModelTfaRepeatBypass
model_save_path = 'model_instances/rnn_image_model_tfa_repeat_bypass_strides.h5'
# need generators with x as img, y as {word +_count:count for each word + code:50 first onehotencoded}
train_dataset = dataset_generator_reader(train_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code',include_context_mode='full_code', 
                                         fixed_output_length=code_max_length, batch_size=32)
valid_dataset = dataset_generator_reader(valid_paths, voc, include_image_object_count_words=voc.object_words,
                                         include_code_mode='full_code', include_context_mode='full_code', 
                                         fixed_output_length=code_max_length, batch_size=32)

model_instance = RnnImageModelTfaRepeatBypass(voc.words,image_count_words=voc.object_words, max_code_length=code_max_length,
                                     dense_layer_size=512, dropout_ratio=0.25, order_layer_output_size=512, pool_using_strides=True, image_out=True)
pred = model_instance.predict(train_dataset.map(features_only).take(1))

model_instance.compile(loss='mse',optimizer=RMSprop(lr=0.0001, clipvalue=1.0))
if os.path.exists(model_save_path):
    logger.info("Loading existing model")
    model_instance.load_weights(model_save_path)

In [None]:
from utils import preprocess_code2context
file_id = '1F4D3508-2479-4D8A-B5F0-92CF690BD1AE'
path = str(list(Path('datasets/web/training_data').rglob(file_id+"_256.npz"))[0])
print(path)
temp = np.load(path, allow_pickle=True)
context = preprocess_code2context(str(temp['code']), 'full_code', voc, 100, 0)
pred = model_instance.predict({'img_data':np.expand_dims(temp['img_data'],0), 'context':np.expand_dims(context,0)})
probas = pred['code'][0]
prediction = " ".join([voc.token2word_dict[val] for val in np.argmax(probas, axis=1)]).split(" <eos>")[0].replace(" <pad>", "")
print(str(temp['code']).replace('close_square_bracket',"]").replace('square_bracket',"["))
print(prediction.replace('close_square_bracket',"]").replace('square_bracket',"["))

n_plots = int(len(voc.object_words))+1
n_rows = math.ceil(n_plots/3)
fig, ax = plt.subplots(n_rows,3,figsize=(15, 5*n_rows), squeeze=False)

ax[0,0].imshow(cv2.cvtColor(temp['img_data'], cv2.COLOR_BGR2RGB),)
for i in range(1, n_plots):
    pred_img = pred["img_out_"+voc.object_words[i-1]][0,:,:,0]
    
    ax[int(i/3),i%3].imshow(pred_img)
    ax[int(i/3),i%3].set_title("{}:{}".format(voc.object_words[i-1],pred[voc.object_words[i-1]+"_count"]))

# Create tables

In [None]:
for model_name in ['pix2code_original_model','shallow_image_model','rnn_image_model_repeat','rnn_image_model_memory','rnn_image_model_tfa','rnn_image_model_tfa_repeat','rnn_image_model_tfa_repeat_bypass']:
    ds_names = ['train','valid','test']
    all_res_ls = []
    #res = pd.DataFrame(index=ds_names, columns=['error','correctly_predicted'])
    for ds_name in ds_names:
        res_df = pd.read_pickle('model_instances/{}_{}_error.pickle'.format(model_name,ds_name))
        all_res_ls.append(res_df[['error','correctly_predicted','same_length','active_button_correct','button_color_correct']].mean().rename(ds_name))

    all_res_df = pd.concat(all_res_ls,axis=1).T 
    all_res_df.index.name='dataset'
    print(model_name)
    print(all_res_df.to_markdown())

In [None]:
df_list = []
for model_name in ['pix2code_original_model','shallow_image_model','rnn_image_model_repeat','rnn_image_model_memory','rnn_image_model_tfa','rnn_image_model_tfa_repeat','rnn_image_model_tfa_repeat_bypass']:
    ds_names = ['train','valid','test']
    all_res_ls = []
    #res = pd.DataFrame(index=ds_names, columns=['error','correctly_predicted'])
    for ds_name in ds_names:
        res_df = pd.read_pickle('model_instances/{}_{}_error.pickle'.format(model_name,ds_name))
        all_res_ls.append(res_df[['error','correctly_predicted','same_length','active_button_correct','button_color_correct']].mean().rename(ds_name))

    all_res_df = pd.concat(all_res_ls,axis=1).T 
    all_res_df.index.name='dataset'
    df_list.append(pd.concat([all_res_df], keys=[model_name],names=['model_name'] )[['error','correctly_predicted']])
df = pd.concat(df_list).reset_index('dataset')
df['Notes/advantages/disadvantages']=None
print(df.to_markdown())    