From 6f421bd97d34ab6d97f4ef024cabd5ffb4f32e53 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 16:31:22 -0700
Subject: [PATCH 01/28] removed data augmentations from c2 configs

---
 .../c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml  | 4 ++--
 .../c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml   | 4 ++--
 .../vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml | 4 ++--
 .../c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml       | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
index 0bce435..86a7779 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
@@ -5,9 +5,9 @@ training:
   problem:
     batch_size: 48
     # Appy all preprocessing/data augmentations.
-    image_preprocessing: all
+    image_preprocessing: normalize
     # none | random_affine | random_horizontal_flip | normalize | all
-    question_preprocessing: all
+    question_preprocessing: lowercase,remove_punctuation,tokenize
     # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
     streams: 
       # Problem is returning tokenized questions.
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml
index d9020d2..7f13e15 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml
@@ -5,9 +5,9 @@ training:
   problem:
     batch_size: 32
     # Appy all preprocessing/data augmentations.
-    image_preprocessing: all
+    image_preprocessing: normalize
     # none | random_affine | random_horizontal_flip | normalize | all
-    question_preprocessing: all
+    question_preprocessing: lowercase,remove_punctuation,tokenize
     # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
     streams: 
       # Problem is returning tokenized questions.
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml
index 4991b84..0680cac 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml
@@ -4,9 +4,9 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 training:
   problem:
     # Appy all preprocessing/data augmentations.
-    image_preprocessing: all
+    image_preprocessing: normalize
     # none | random_affine | random_horizontal_flip | normalize | all
-    question_preprocessing: all
+    question_preprocessing: lowercase,remove_punctuation,tokenize
     # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
     streams: 
       # Problem is returning tokenized questions.
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml
index c97870b..cc9a025 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml
@@ -4,9 +4,9 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 training:
   problem:
     # Appy all preprocessing/data augmentations.
-    image_preprocessing: all
+    image_preprocessing: normalize
     # none | random_affine | random_horizontal_flip | normalize | all
-    question_preprocessing: all
+    question_preprocessing: lowercase,remove_punctuation,tokenize
     # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
     streams: 
       # Problem is returning tokenized questions.

From 8e7d08b6c859addc01a9fb11a34125cec30718b1 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 16:56:52 -0700
Subject: [PATCH 02/28] added using config name as pipeline name in trainer

---
 .../c2_class_lstm_resnet152_ewm_cat_is.yml    |  1 -
 .../c2_class_lstm_resnet152_rn_cat_is.yml     |  1 -
 .../c2_class_lstm_vgg16_rn.yml                |  1 -
 .../c2_class_lstm_vgg16_rn_cat_is.yml         |  1 -
 ...c2_classification_all_rnn_vgg16_concat.yml |  1 -
 .../c2_classification_all_rnn_vgg16_ewm.yml   |  1 -
 ..._classification_all_rnn_vgg16_ewm_size.yml |  1 -
 .../c2_classification_all_rnn_vgg16_mcb.yml   |  1 -
 .../c2_word_answer_onehot_bow.yml             |  1 -
 ptp/workers/processor.py                      | 14 +++---
 ptp/workers/trainer.py                        | 47 +++++++------------
 11 files changed, 25 insertions(+), 45 deletions(-)

diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
index 86a7779..8558fd7 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
@@ -24,7 +24,6 @@ validation:
 
 
 pipeline:
-  name: c2_class_lstm_resnet152_ewm_cat_is
 
   global_publisher:
     priority: 0
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml
index 7f13e15..1a1a05c 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml
@@ -24,7 +24,6 @@ validation:
 
 
 pipeline:
-  name: c2_class_lstm_resnet152_rn_cat_is
 
   global_publisher:
     priority: 0
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml
index 0680cac..14e4de2 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml
@@ -22,7 +22,6 @@ validation:
 
 
 pipeline:
-  name: c2_class_lstm_vgg16_rn
 
   global_publisher:
     priority: 0
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml
index cc9a025..22c25e4 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml
@@ -22,7 +22,6 @@ validation:
 
 
 pipeline:
-  name: c2_class_lstm_vgg16_rn_cat_is
 
   global_publisher:
     priority: 0
diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml
index d3aa792..51fba8d 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml
@@ -2,7 +2,6 @@
 default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 
 pipeline:
-  name: vqa_med_c2_classification_all_rnn_vgg_concat
 
   global_publisher:
     priority: 0
diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm.yml
index 84c8bf8..5447526 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm.yml
@@ -2,7 +2,6 @@
 default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 
 pipeline:
-  name: c2_classification_all_rnn_vgg16_ewm
 
   global_publisher:
     priority: 0
diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
index 7db3a3c..d34899b 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
@@ -2,7 +2,6 @@
 default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 
 pipeline:
-  name: c2_classification_all_rnn_vgg16_ewm_size
 
   global_publisher:
     priority: 0
diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml
index cabc1dc..d28a24f 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml
@@ -2,7 +2,6 @@
 default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 
 pipeline:
-  name: c2_classification_all_rnn_vgg16_mcb
 
   global_publisher:
     priority: 0
diff --git a/configs/vqa_med_2019/c2_classification/c2_word_answer_onehot_bow.yml b/configs/vqa_med_2019/c2_classification/c2_word_answer_onehot_bow.yml
index 73dcce7..2d28708 100644
--- a/configs/vqa_med_2019/c2_classification/c2_word_answer_onehot_bow.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_word_answer_onehot_bow.yml
@@ -14,7 +14,6 @@ validation:
     batch_size: 128
 
 pipeline:
-  name: c2_word_answer_onehot_bow
 
   # Answer encoding.
   answer_tokenizer:
diff --git a/ptp/workers/processor.py b/ptp/workers/processor.py
index bfc29dc..b5afa68 100644
--- a/ptp/workers/processor.py
+++ b/ptp/workers/processor.py
@@ -17,7 +17,7 @@
 
 __author__ = "Tomasz Kornuta, Vincent Marois, Younes Bouhadjar"
 
-import os
+from os import path,makedirs
 import torch
 from time import sleep
 from datetime import datetime
@@ -93,12 +93,12 @@ def setup_global_experiment(self):
             exit(-2)
 
         # Check if file with model exists.
-        if not os.path.isfile(chkpt_file):
+        if not path.isfile(chkpt_file):
             print('Checkpoint file {} does not exist'.format(chkpt_file))
             exit(-3)
 
         # Extract path.
-        self.abs_path, _ = os.path.split(os.path.dirname(os.path.expanduser(chkpt_file)))
+        self.abs_path, _ = path.split(path.dirname(path.expanduser(chkpt_file)))
         print(self.abs_path)
 
         # Check if config file was indicated by the user.
@@ -106,10 +106,10 @@ def setup_global_experiment(self):
             # Split and make them absolute.
             root_configs = self.app_state.args.config.replace(" ", "").split(',')
             # If there are - expand them to absolute paths.
-            abs_root_configs = [os.path.expanduser(config) for config in root_configs]
+            abs_root_configs = [path.expanduser(config) for config in root_configs]
         else:
             # Use the "default one".
-            abs_root_configs = [os.path.join(self.abs_path, 'training_configuration.yml')]
+            abs_root_configs = [path.join(self.abs_path, 'training_configuration.yml')]
 
         # Get the list of configurations which need to be loaded.
         configs_to_load = config_parsing.recurrent_config_parse(abs_root_configs, [], self.app_state.absolute_config_path)
@@ -160,7 +160,7 @@ def setup_individual_experiment(self):
                 self.app_state.log_dir = self.abs_path + '/' + time_str + '/'
                 # Lowercase dir.
                 self.app_state.log_dir = self.app_state.log_dir.lower()
-                os.makedirs(self.app_state.log_dir, exist_ok=False)
+                makedirs(self.app_state.log_dir, exist_ok=False)
             except FileExistsError:
                 sleep(1)
             else:
@@ -254,7 +254,7 @@ def setup_individual_experiment(self):
                 pipeline_name = ""
             # Try to load the model.
             if pipeline_name != "":
-                if os.path.isfile(pipeline_name):
+                if path.isfile(pipeline_name):
                     # Load parameters from checkpoint.
                     self.pipeline.load(pipeline_name)
                 else:
diff --git a/ptp/workers/trainer.py b/ptp/workers/trainer.py
index c3a583c..9a93591 100644
--- a/ptp/workers/trainer.py
+++ b/ptp/workers/trainer.py
@@ -17,7 +17,7 @@
 
 __author__ = "Vincent Marois, Tomasz Kornuta"
 
-import os
+from os import path,makedirs
 import yaml
 import torch
 from time import sleep
@@ -84,38 +84,25 @@ def setup_experiment(self):
 
         - Calls base class setup_experiment to parse the command line arguments,
 
-        - Loads the config file(s):
+        - Loads the config file(s)
 
-            >>> configs_to_load = self.recurrent_config_parse(flags.config, [])
+        - Set up the log directory path
 
-        - Set up the log directory path:
+        - Add a ``FileHandler`` to the logger
 
-            >>> os.makedirs(self.app_state.log_dir, exist_ok=False)
-
-        - Add a ``FileHandler`` to the logger:
-
-            >>>  self.add_file_handler_to_logger(self.log_file)
-
-        - Set random seeds:
-
-            >>>  self.set_random_seeds(self.config['training'], 'training')
+        - Set random seeds
 
         - Creates the pipeline consisting of many components
 
         - Creates training problem manager
 
-        - Handles curriculum learning if indicated:
+        - Handles curriculum learning if indicated
 
-            >>> if 'curriculum_learning' in self.config['training']:
-            >>> ...
+        - Creates validation problem manager
 
-        - Creates training problem manager
-
-        - Set optimizer:
+        - Set optimizer
 
-            >>> self.optimizer = getattr(torch.optim, optimizer_name)
-
-        - Performs testing of compatibility of both training and validation pipelines.
+        - Performs testing of compatibility of both training and validation problems and created pipeline.
 
         """
         # Call base method to parse all command line arguments and add default sections.
@@ -134,7 +121,7 @@ def setup_experiment(self):
         # Split and make them absolute.
         root_configs = self.app_state.args.config.replace(" ", "").split(',')
         # If there are - expand them to absolute paths.
-        abs_root_configs = [os.path.expanduser(config) for config in root_configs]
+        abs_root_configs = [path.expanduser(config) for config in root_configs]
         
         # Get the list of configurations which need to be loaded.
         configs_to_load = config_parse.recurrent_config_parse(abs_root_configs, [], self.app_state.absolute_config_path)
@@ -168,8 +155,10 @@ def setup_experiment(self):
         try:
             pipeline_name = self.config['pipeline']['name']
         except KeyError:
-            print("Error: Couldn't retrieve the pipeline 'name' from the loaded configuration")
-            exit(-1)
+            # Using name of the first configuration file from command line.
+            pipeline_name = path.basename(root_configs[0])
+            # Set pipeline name, so processor can use it afterwards.
+            self.config['pipeline'].add_config_params({'name': pipeline_name})
 
         # Prepare the output path for logging
         while True:  # Dirty fix: if log_dir already exists, wait for 1 second and try again
@@ -177,10 +166,10 @@ def setup_experiment(self):
                 time_str = '{0:%Y%m%d_%H%M%S}'.format(datetime.now())
                 if self.app_state.args.savetag != '':
                     time_str = time_str + "_" + self.app_state.args.savetag
-                self.app_state.log_dir = os.path.expanduser(self.app_state.args.expdir) + '/' + training_problem_type + '/' + pipeline_name + '/' + time_str + '/'
+                self.app_state.log_dir = path.expanduser(self.app_state.args.expdir) + '/' + training_problem_type + '/' + pipeline_name + '/' + time_str + '/'
                 # Lowercase dir.
                 self.app_state.log_dir = self.app_state.log_dir.lower()
-                os.makedirs(self.app_state.log_dir, exist_ok=False)
+                makedirs(self.app_state.log_dir, exist_ok=False)
             except FileExistsError:
                 sleep(1)
             else:
@@ -199,7 +188,7 @@ def setup_experiment(self):
 
         # Models dir.
         self.checkpoint_dir = self.app_state.log_dir + 'checkpoints/'
-        os.makedirs(self.checkpoint_dir, exist_ok=False)
+        makedirs(self.checkpoint_dir, exist_ok=False)
 
         # Set random seeds in the training section.
         self.set_random_seeds('training', self.config['training'])
@@ -283,7 +272,7 @@ def setup_experiment(self):
                 pipeline_name = ""
             # Try to load the model.
             if pipeline_name != "":
-                if os.path.isfile(pipeline_name):
+                if path.isfile(pipeline_name):
                     # Load parameters from checkpoint.
                     self.pipeline.load(pipeline_name)
                 else:

From 94f1e63ea20ff155d5d44208ed848d4e16e7c288 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 17:34:40 -0700
Subject: [PATCH 03/28] resnet50 feature_maps mode and c2 configs using
 resnet50 with EWM/RN

---
 .../c2_class_lstm_resnet50_ewm_cat_is.yml     | 144 ++++++++++++++++++
 .../c2_class_lstm_resnet50_rn_cat_is.yml      | 141 +++++++++++++++++
 ptp/components/models/torch_vision_wrapper.py |  21 ++-
 3 files changed, 302 insertions(+), 4 deletions(-)
 create mode 100644 configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
 create mode 100644 configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_rn_cat_is.yml

diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
new file mode 100644
index 0000000..4834d40
--- /dev/null
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
@@ -0,0 +1,144 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
+
+training:
+  problem:
+    batch_size: 48
+    # Appy all preprocessing/data augmentations.
+    image_preprocessing: normalize
+    # none | random_affine | random_horizontal_flip | normalize | all
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+    streams: 
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+validation:
+  problem:
+    batch_size: 48
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+    streams: 
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+
+pipeline:
+
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    # Add input_size to globals.
+    keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size]
+    values: [100, 100, 100, 2, 10]
+
+  ################# PIPE 0: question #################
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 100
+    pretrained_embeddings_file: glove.6B.100d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  question_lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    use_logsoftmax: False
+    initial_state: Trainable
+    dropout_rate: 0.1
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  ################# PIPE 2: image #################
+  # Image encoder.
+  image_encoder:
+    priority: 3.1
+    type: TorchVisionWrapper
+    model_type: resnet50
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ################# PIPE 3: image-question fusion  #################
+  # Element wise multiplication + FF.
+  question_image_fusion:
+    priority: 4.1
+    type: ElementWiseMultiplication
+    dropout_rate: 0.5
+    streams:
+      image_encodings: image_activations
+      question_encodings: question_activations
+      outputs: element_wise_activations
+    globals:
+      image_encoding_size: image_encoder_output_size
+      question_encoding_size: question_encoder_output_size
+      output_size: element_wise_activation_size
+
+  question_image_ffn:
+    priority: 4.2
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: element_wise_activations
+      predictions: question_image_activations
+    globals:
+      input_size: element_wise_activation_size
+      prediction_size: element_wise_activation_size
+
+  ################# PIPE 5: image-question-image size fusion + classification #################
+  # Model - image size FFN.
+  image_size_encoder:
+    priority: 5.1
+    type: FeedForwardNetwork 
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  # 4th subpipeline: concatenation + FF.
+  concat:
+    priority: 5.2
+    type: Concatenation
+    input_streams: [question_image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,10]]
+    output_dims: [-1,110]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatentated_activations_size
+
+
+  classifier:
+    priority: 5.3
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: concatentated_activations_size
+      prediction_size: vocabulary_size_c2
+
+
+  #: pipeline
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_rn_cat_is.yml
new file mode 100644
index 0000000..51dd275
--- /dev/null
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_rn_cat_is.yml
@@ -0,0 +1,141 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
+
+training:
+  problem:
+    batch_size: 32
+    # Appy all preprocessing/data augmentations.
+    image_preprocessing: normalize
+    # none | random_affine | random_horizontal_flip | normalize | all
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+    streams: 
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+validation:
+  problem:
+    batch_size: 32
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+    streams: 
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+
+pipeline:
+
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    # Add input_size to globals.
+    keys: [question_encoder_output_size,rn_activation_size,image_size_encoder_input_size, image_size_encoder_output_size]
+    values: [100, 100, 2, 10]
+
+  ################# PIPE 0: question #################
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 100
+    pretrained_embeddings_file: glove.6B.100d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  question_lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    use_logsoftmax: False
+    initial_state: Trainable
+    dropout_rate: 0.1
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  ################# PIPE 2: image #################
+  # Image encoder.
+  image_encoder:
+    priority: 3.1
+    type: TorchVisionWrapper
+    model_type: resnet50
+    return_feature_maps: True
+    streams:
+      inputs: images
+      outputs: feature_maps
+
+  ################# PIPE 3: Fusion: Relational Network #################
+  # Object-object relations.
+  question_image_fusion:
+    priority: 4.1
+    type: RelationalNetwork
+    dropout_rate: 0.5
+    g_theta_sizes: [512, 256]
+    streams:
+      question_encodings: question_activations
+      outputs: fused_image_question_activations
+    globals:
+      question_encoding_size: question_encoder_output_size
+      output_size: fused_image_question_activation_size
+
+  question_image_ffn:
+    priority: 4.2
+    type: FeedForwardNetwork 
+    hidden_sizes: [128,100]
+    dropout_rate: 0.5
+    streams:
+      inputs: fused_image_question_activations
+      predictions: rn_activation
+    globals:
+      input_size: fused_image_question_activation_size
+      prediction_size: rn_activation_size
+
+
+  ################# PIPE 5: image-question-image size fusion + classification #################
+  # Model - image size FFN.
+  image_size_encoder:
+    priority: 5.1
+    type: FeedForwardNetwork 
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  # 6th subpipeline: concatenation + FF.
+  concat:
+    priority: 5.2
+    type: Concatenation
+    input_streams: [rn_activation,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,10]]
+    output_dims: [-1,110]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatentated_activations_size
+
+  classifier:
+    priority: 5.3
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: concatentated_activations_size
+      prediction_size: vocabulary_size_c2
+
+  #: pipeline
diff --git a/ptp/components/models/torch_vision_wrapper.py b/ptp/components/models/torch_vision_wrapper.py
index 3419bfb..92e4cf9 100644
--- a/ptp/components/models/torch_vision_wrapper.py
+++ b/ptp/components/models/torch_vision_wrapper.py
@@ -119,11 +119,24 @@ def __init__(self, name, config):
             self.model = models.resnet50(pretrained=pretrained)
 
             if self.return_feature_maps:
-                raise ConfigurationError("'resnet50' doesn't support 'return_feature_maps' mode (yet)")
+                # Get all modules exluding last (avgpool) and (fc)
+                modules=list(self.model.children())[:-2]
+                self.model=torch.nn.Sequential(*modules)                
 
-            # Use the whole model, but cut/reshape only the last layer.
-            self.output_size = self.globals["output_size"]
-            self.model.fc = torch.nn.Linear(2048, self.output_size)
+                # Height of the returned features tensor (SET)
+                self.feature_maps_height = 7
+                self.globals["feature_maps_height"] = self.feature_maps_height
+                # Width of the returned features tensor (SET)
+                self.feature_maps_width = 7
+                self.globals["feature_maps_width"] = self.feature_maps_width
+                # Depth of the returned features tensor (SET)
+                self.feature_maps_depth = 2048
+                self.globals["feature_maps_depth"] = self.feature_maps_depth
+
+            else:
+                # Use the whole model, but cut/reshape only the last layer.
+                self.output_size = self.globals["output_size"]
+                self.model.fc = torch.nn.Linear(2048, self.output_size)
 
 
     def input_data_definitions(self):

From 5940c24da769fcee6f758e7210804434f35450f4 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 19:40:20 -0700
Subject: [PATCH 04/28] c123_binary_lstm_vgg16_cat_ffn_loss.yml

---
 .../components/models/sentence_embeddings.yml |   2 +-
 .../c2_class_lstm_resnet50_ewm_cat_is.yml     |   5 +-
 .../default_c2_classification.yml             |   2 +-
 ...> c123_binary_lstm_vgg16_cat_ffn_loss.yml} |  13 +-
 ..._binary_lstm_resnet152_is_cat_ffn_loss.yml | 291 ++++++++++++++++++
 5 files changed, 296 insertions(+), 17 deletions(-)
 rename configs/vqa_med_2019/vf/{c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml => c123_binary_lstm_vgg16_cat_ffn_loss.yml} (95%)
 create mode 100644 configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml

diff --git a/configs/default/components/models/sentence_embeddings.yml b/configs/default/components/models/sentence_embeddings.yml
index 0056849..5feccd7 100644
--- a/configs/default/components/models/sentence_embeddings.yml
+++ b/configs/default/components/models/sentence_embeddings.yml
@@ -13,7 +13,7 @@ source_vocabulary_files: ''
 # Additional tokens that will be added to vocabulary (LOADED)
 # This list can be extended, but <PAD> and <EOS> are special tokens.
 # <PAD> is ALWAYS used for padding shorter sequences.
-additional_tokens: '<PAD>,<EOS>'
+additional_tokens: '<PAD>'
 
 # Enable <EOS> (end of sequence) token.
 eos_token: False
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
index 4834d40..eab27f4 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
@@ -5,10 +5,7 @@ training:
   problem:
     batch_size: 48
     # Appy all preprocessing/data augmentations.
-    image_preprocessing: normalize
-    # none | random_affine | random_horizontal_flip | normalize | all
     question_preprocessing: lowercase,remove_punctuation,tokenize
-    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
     streams: 
       # Problem is returning tokenized questions.
       questions: tokenized_questions
@@ -16,8 +13,8 @@ training:
 validation:
   problem:
     batch_size: 48
+    # Appy all preprocessing/data augmentations.
     question_preprocessing: lowercase,remove_punctuation,tokenize
-    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
     streams: 
       # Problem is returning tokenized questions.
       questions: tokenized_questions
diff --git a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml
index 68f5880..b4b08d0 100644
--- a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml
+++ b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml
@@ -82,6 +82,6 @@ pipeline:
   viewer:
     type: StreamViewer
     priority: 100.4
-    input_streams: questions,category_names,answers,predicted_answers
+    input_streams: questions,tokenized_questions,category_names,answers,predicted_answers
 
 #: pipeline
diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml
similarity index 95%
rename from configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml
rename to configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml
index 94af6aa..bdc0488 100644
--- a/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml
@@ -5,9 +5,9 @@ default_configs: vqa_med_2019/default_vqa_med_2019.yml
 training:
   problem:
     categories: C1,C2,C3
-    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3.weights.csv
   sampler:
-    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+    weights: ~/data/vqa-med/answers.c1_c2_c3.weights.csv
 
 # Validation parameters:
 validation:
@@ -16,7 +16,6 @@ validation:
 
 
 pipeline:
-  name: c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss
   
   ################# PIPE 0: SHARED #################
 
@@ -33,14 +32,6 @@ pipeline:
     type: BatchSizeStatistics
     priority: 0.1
 
-  # Questions encoding.
-  pipe1_question_tokenizer:
-    priority: 0.2
-    type: SentenceTokenizer
-    streams: 
-      inputs: questions
-      outputs: tokenized_questions
-
   ################# PIPE 0: CATEGORY #################
 
   # Model 1: question embeddings
diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
new file mode 100644
index 0000000..d2fb6d4
--- /dev/null
+++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
@@ -0,0 +1,291 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1,C2,C3
+    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1,C2,C3
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+
+
+pipeline:
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix]
+    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_questions_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    type: AccuracyStatistics
+    priority: 0.7
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    type: LabelIndexer
+    priority: 1.3
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_all_c123
+      word_mappings: word_mappings_all_c123
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 2.1
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 3.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: SHARED CONCAT #################
+
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [questions_activations,image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100],[-1,10]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+
+  ################# PIPE 5: C1 + C2 + C3 questions #################
+
+  # Answer encoding for PIPE 5.
+  pipe5_c123_answer_indexer:
+    type: LabelIndexer
+    priority: 5.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe5_c123_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123
+      word_mappings: word_mappings_all_c123
+
+  # Sample masking based on categories.
+  pipe5_c123_string_to_mask:
+    priority: 5.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_c2_c3_no_binary_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c123_by_question_categories_indices # NOT USED
+      masks: pipe5_c123_masks
+
+  # Model 4: FFN C1 answering
+  pipe5_c123_ffn:
+    priority: 5.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe5_c123_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c123
+
+  pipe5_c123_nllloss:
+    type: NLLLoss
+    priority: 5.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe5_c123_predictions
+      masks: pipe5_c123_masks
+      targets: pipe5_c123_answers_ids
+      loss: pipe5_c123_loss
+
+  pipe5_c123_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 5.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe5_c123_masks
+      predictions: pipe5_c123_predictions
+      targets: pipe5_c123_answers_ids
+    globals:
+      word_mappings: word_mappings_all_c123
+    statistics:
+      precision: pipe5_c123_precision
+      recall: pipe5_c123_recall
+      f1score: pipe5_c123_f1score
+
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 9.3
+    input_streams: tokenized_questions,answers, category_names,predicted_question_categories_names, pipe5_c123_masks,pipe5_c123_answers_without_yn_ids,pipe5_c123_predictions
+
+
+#: pipeline

From 2371628fb6ecd81e4ba634c1d030d7ab4e5c5eed Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 19:46:20 -0700
Subject: [PATCH 05/28] c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml

---
 .../vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml         | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
index d2fb6d4..6e2c828 100644
--- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
@@ -172,6 +172,7 @@ pipeline:
   # Image encoder.
   image_encoder:
     type: TorchVisionWrapper
+    model: resnet152
     priority: 2.1
     streams:
       inputs: images

From d5a11d0ea04a66999e397dcd4c62285db830284e Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 19:51:06 -0700
Subject: [PATCH 06/28] 123_no_binary_lstm_resnet50_is_cat_ffn_loss

---
 ...o_binary_lstm_resnet50_is_cat_ffn_loss.yml | 292 ++++++++++++++++++
 1 file changed, 292 insertions(+)
 create mode 100644 configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml

diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
new file mode 100644
index 0000000..e95750a
--- /dev/null
+++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
@@ -0,0 +1,292 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1,C2,C3
+    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1,C2,C3
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+
+
+pipeline:
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix]
+    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_questions_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    type: AccuracyStatistics
+    priority: 0.7
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    type: LabelIndexer
+    priority: 1.3
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_all_c123
+      word_mappings: word_mappings_all_c123
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    model: resnet50
+    priority: 2.1
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 3.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: SHARED CONCAT #################
+
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [questions_activations,image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100],[-1,10]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+
+  ################# PIPE 5: C1 + C2 + C3 questions #################
+
+  # Answer encoding for PIPE 5.
+  pipe5_c123_answer_indexer:
+    type: LabelIndexer
+    priority: 5.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe5_c123_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123
+      word_mappings: word_mappings_all_c123
+
+  # Sample masking based on categories.
+  pipe5_c123_string_to_mask:
+    priority: 5.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_c2_c3_no_binary_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c123_by_question_categories_indices # NOT USED
+      masks: pipe5_c123_masks
+
+  # Model 4: FFN C1 answering
+  pipe5_c123_ffn:
+    priority: 5.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe5_c123_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c123
+
+  pipe5_c123_nllloss:
+    type: NLLLoss
+    priority: 5.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe5_c123_predictions
+      masks: pipe5_c123_masks
+      targets: pipe5_c123_answers_ids
+      loss: pipe5_c123_loss
+
+  pipe5_c123_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 5.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe5_c123_masks
+      predictions: pipe5_c123_predictions
+      targets: pipe5_c123_answers_ids
+    globals:
+      word_mappings: word_mappings_all_c123
+    statistics:
+      precision: pipe5_c123_precision
+      recall: pipe5_c123_recall
+      f1score: pipe5_c123_f1score
+
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 9.3
+    input_streams: tokenized_questions,answers, category_names,predicted_question_categories_names, pipe5_c123_masks,pipe5_c123_answers_without_yn_ids,pipe5_c123_predictions
+
+
+#: pipeline

From b7b619a88167e0b8525b4da620e1386dcc629baa Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 20:23:11 -0700
Subject: [PATCH 07/28] cleanup of c123_no_binary cat pipelines with different
 image encoders

---
 .../c123_binary_lstm_vgg16_cat_ffn_loss.yml   | 93 ++++++++++++-------
 ..._binary_lstm_resnet152_is_cat_ffn_loss.yml | 20 +++-
 ...o_binary_lstm_resnet50_is_cat_ffn_loss.yml | 20 +++-
 3 files changed, 94 insertions(+), 39 deletions(-)

diff --git a/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml
index bdc0488..3d5b25a 100644
--- a/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml
@@ -5,14 +5,22 @@ default_configs: vqa_med_2019/default_vqa_med_2019.yml
 training:
   problem:
     categories: C1,C2,C3
-    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3.weights.csv
+    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
   sampler:
-    weights: ~/data/vqa-med/answers.c1_c2_c3.weights.csv
+    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
 
 # Validation parameters:
 validation:
   problem:
     categories: C1,C2,C3
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
 
 
 pipeline:
@@ -24,8 +32,8 @@ pipeline:
     type: GlobalVariablePublisher
     priority: 0
     # Add input_size to globals.
-    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix]
-    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}]
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix]
+    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}]
 
   # Statistics.
   batch_size:
@@ -114,8 +122,6 @@ pipeline:
     statistics:
       accuracy: categorization_accuracy
   
-
-
   ################# PIPE 1: SHARED QUESTION ENCODER #################
 
   # Model 1: question embeddings
@@ -158,14 +164,15 @@ pipeline:
       inputs: answers
       outputs: all_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_all_c1_c2_c3_binary
-      word_mappings: word_mappings_all_c1_c2_c3_binary
+      vocabulary_size: vocabulary_size_all_c123
+      word_mappings: word_mappings_all_c123
 
   ################# PIPE 2: SHARED IMAGE ENCODER #################
 
   # Image encoder.
   image_encoder:
     type: TorchVisionWrapper
+    model: vgg16
     priority: 2.1
     streams:
       inputs: images
@@ -202,10 +209,10 @@ pipeline:
       output_size: concatenated_activations_size
 
 
-  ################# PIPE 5: C1 + C2 + C2 + Binary Y/N question #################
+  ################# PIPE 5: C1 + C2 + C3 questions #################
 
   # Answer encoding for PIPE 5.
-  pipe5_all_answer_indexer:
+  pipe5_c123_answer_indexer:
     type: LabelIndexer
     priority: 5.1
     data_folder: ~/data/vqa-med
@@ -214,72 +221,88 @@ pipeline:
     export_word_mappings_to_globals: True
     streams:
       inputs: answers
-      outputs: pipe5_all_answers_ids
+      outputs: pipe5_c123_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_c1_c2_c3_binary
-      word_mappings: word_mappings_all_c1_c2_c3_binary
+      vocabulary_size: vocabulary_size_c123
+      word_mappings: word_mappings_all_c123
 
   # Sample masking based on categories.
-  pipe5_all_string_to_mask:
+  pipe5_c123_string_to_mask:
     priority: 5.2
     type: StringToMask
     globals:
-      word_mappings: category_c1_c2_c3_binary_yn_word_to_ix
+      word_mappings: category_c1_c2_c3_no_binary_word_to_ix
     streams:
       strings: pipe0_predicted_question_categories_names
-      string_indices: predicted_c1_c2_c3_binary_by_question_categories_indices # NOT USED
-      masks: pipe5_all_masks
+      string_indices: predicted_c123_by_question_categories_indices # NOT USED
+      masks: pipe5_c123_masks
 
   # Model 4: FFN C1 answering
-  pipe5_all_ffn:
+  pipe5_c123_ffn:
     priority: 5.3
     type: FeedForwardNetwork
     hidden: [50]
     dropout_rate: 0.5
     streams:
       inputs: concatenated_activations
-      predictions: pipe5_all_predictions
+      predictions: pipe5_c123_predictions
     globals:
       input_size: concatenated_activations_size
-      prediction_size: vocabulary_size_c1_c2_c3_binary
+      prediction_size: vocabulary_size_c123
 
-  pipe5_all_nllloss:
+  pipe5_c123_nllloss:
     type: NLLLoss
     priority: 5.4
     targets_dim: 1
     use_masking: True
     streams:
-      predictions: pipe5_all_predictions
-      masks: pipe5_all_masks
-      targets: pipe5_all_answers_ids
-      loss: pipe5_all_loss
+      predictions: pipe5_c123_predictions
+      masks: pipe5_c123_masks
+      targets: pipe5_c123_answers_ids
+      loss: pipe5_c123_loss
 
-  pipe5_all_precision_recall:
+  pipe5_c123_precision_recall:
     type: PrecisionRecallStatistics
     priority: 5.5
     use_word_mappings: True
     use_masking: True
-    #show_class_scores: True
+    show_class_scores: True
     #show_confusion_matrix: True
     streams:
-      masks: pipe5_all_masks
-      predictions: pipe5_all_predictions
-      targets: pipe5_all_answers_ids
+      masks: pipe5_c123_masks
+      predictions: pipe5_c123_predictions
+      targets: pipe5_c123_answers_ids
     globals:
-      word_mappings: word_mappings_all_c1_c2_c3_binary
+      word_mappings: word_mappings_all_c123
     statistics:
-      precision: pipe5_all_precision
-      recall: pipe5_all_recall
-      f1score: pipe5_all_f1score
+      precision: pipe5_c123_precision
+      recall: pipe5_c123_recall
+      f1score: pipe5_c123_f1score
 
+  # C123 Predictions decoder.
+  pipe5_prediction_decoder:
+    type: WordDecoder
+    priority: 5.6
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe5_c123_predictions
+      outputs: pipe5_c123_predicted_answers
+    globals:
+      word_mappings: word_mappings_all_c123
 
   ################# PIPE 9: MERGE ANSWERS #################
 
+
   # Viewers.
   viewer:
     type: StreamViewer
     priority: 9.3
-    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_all_masks,pipe5_all_answers_without_yn_ids,pipe5_all_predictions
+    input_streams:
+      tokenized_questions, category_names,
+      pipe0_predicted_question_categories_names,
+      pipe5_c123_masks,
+      answers, pipe5_c123_predicted_answers
 
 
 #: pipeline
diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
index 6e2c828..1de281c 100644
--- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
@@ -266,7 +266,7 @@ pipeline:
     priority: 5.5
     use_word_mappings: True
     use_masking: True
-    #show_class_scores: True
+    show_class_scores: True
     #show_confusion_matrix: True
     streams:
       masks: pipe5_c123_masks
@@ -279,14 +279,30 @@ pipeline:
       recall: pipe5_c123_recall
       f1score: pipe5_c123_f1score
 
+  # C123 Predictions decoder.
+  pipe5_prediction_decoder:
+    type: WordDecoder
+    priority: 5.6
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe5_c123_predictions
+      outputs: pipe5_c123_predicted_answers
+    globals:
+      word_mappings: word_mappings_all_c123
 
   ################# PIPE 9: MERGE ANSWERS #################
 
+
   # Viewers.
   viewer:
     type: StreamViewer
     priority: 9.3
-    input_streams: tokenized_questions,answers, category_names,predicted_question_categories_names, pipe5_c123_masks,pipe5_c123_answers_without_yn_ids,pipe5_c123_predictions
+    input_streams:
+      tokenized_questions, category_names,
+      pipe0_predicted_question_categories_names,
+      pipe5_c123_masks,
+      answers, pipe5_c123_predicted_answers
 
 
 #: pipeline
diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
index e95750a..287b2a8 100644
--- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
@@ -266,7 +266,7 @@ pipeline:
     priority: 5.5
     use_word_mappings: True
     use_masking: True
-    #show_class_scores: True
+    show_class_scores: True
     #show_confusion_matrix: True
     streams:
       masks: pipe5_c123_masks
@@ -279,14 +279,30 @@ pipeline:
       recall: pipe5_c123_recall
       f1score: pipe5_c123_f1score
 
+  # C123 Predictions decoder.
+  pipe5_prediction_decoder:
+    type: WordDecoder
+    priority: 5.6
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe5_c123_predictions
+      outputs: pipe5_c123_predicted_answers
+    globals:
+      word_mappings: word_mappings_all_c123
 
   ################# PIPE 9: MERGE ANSWERS #################
 
+
   # Viewers.
   viewer:
     type: StreamViewer
     priority: 9.3
-    input_streams: tokenized_questions,answers, category_names,predicted_question_categories_names, pipe5_c123_masks,pipe5_c123_answers_without_yn_ids,pipe5_c123_predictions
+    input_streams:
+      tokenized_questions, category_names,
+      pipe0_predicted_question_categories_names,
+      pipe5_c123_masks,
+      answers, pipe5_c123_predicted_answers
 
 
 #: pipeline

From ef65ef62390eb73e41ab3c08c3ba172708699259 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 21:35:48 -0700
Subject: [PATCH 08/28] c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml +
 cleanups of v2 c123 pipelines

---
 .../c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml     | 1 +
 .../c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml      | 1 +
 .../c2_classification_all_rnn_vgg16_ewm_size.yml                 | 1 +
 .../vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml         | 1 +
 .../vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml          | 1 +
 5 files changed, 5 insertions(+)

diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
index 8558fd7..b27aea1 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
@@ -95,6 +95,7 @@ pipeline:
     type: FeedForwardNetwork 
     hidden_sizes: [100]
     dropout_rate: 0.5
+    use_logsoftmax: False
     streams:
       inputs: element_wise_activations
       predictions: question_image_activations
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
index eab27f4..2db4248 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
@@ -92,6 +92,7 @@ pipeline:
     type: FeedForwardNetwork 
     hidden_sizes: [100]
     dropout_rate: 0.5
+    use_logsoftmax: False
     streams:
       inputs: element_wise_activations
       predictions: question_image_activations
diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
index d34899b..1a1f774 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
@@ -79,6 +79,7 @@ pipeline:
     type: FeedForwardNetwork 
     hidden_sizes: [100]
     dropout_rate: 0.5
+    use_logsoftmax: False
     streams:
       inputs: element_wise_activations
       predictions: question_image_activations
diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
index 1de281c..9b86692 100644
--- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
@@ -186,6 +186,7 @@ pipeline:
   image_size_encoder:
     type: FeedForwardNetwork 
     priority: 3.1
+    use_losfotmax: False
     streams:
       inputs: image_sizes
       predictions: image_size_activations
diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
index 287b2a8..3c92309 100644
--- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
@@ -186,6 +186,7 @@ pipeline:
   image_size_encoder:
     type: FeedForwardNetwork 
     priority: 3.1
+    use_losfotmax: False
     streams:
       inputs: image_sizes
       predictions: image_size_activations

From 43f83bc2361f697bb90c3db9832e3bbc23258dae Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 21:40:12 -0700
Subject: [PATCH 09/28] c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml

---
 ...nary_lstm_resnet50_ewm_is_cat_ffn_loss.yml | 336 ++++++++++++++++++
 1 file changed, 336 insertions(+)
 create mode 100644 configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml

diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml
new file mode 100644
index 0000000..fa58b97
--- /dev/null
+++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml
@@ -0,0 +1,336 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1,C2,C3
+    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1,C2,C3
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+
+
+pipeline:
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c1_c2_c3_no_binary_word_to_ix]
+    values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_question_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_encoder_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    type: AccuracyStatistics
+    priority: 0.7
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    type: LabelIndexer
+    priority: 1.3
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_all_c123
+      word_mappings: word_mappings_all_c123
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    model: vgg16
+    priority: 2.1
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 3.1
+    use_losfotmax: False
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: image-question fusion  #################
+  # Element wise multiplication + FF.
+  question_image_fusion:
+    priority: 4.1
+    type: ElementWiseMultiplication
+    dropout_rate: 0.5
+    streams:
+      image_encodings: image_activations
+      question_encodings: question_activations
+      outputs: element_wise_activations
+    globals:
+      image_encoding_size: image_encoder_output_size
+      question_encoding_size: question_encoder_output_size
+      output_size: element_wise_activation_size
+
+  question_image_ffn:
+    priority: 4.2
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    use_logsoftmax: False
+    streams:
+      inputs: element_wise_activations
+      predictions: question_image_activations
+    globals:
+      input_size: element_wise_activation_size
+      prediction_size: element_wise_activation_size
+
+  ################# PIPE 5: image-question-image size fusion #################
+
+  # 5th subpipeline: concatenation 
+  concat:
+    priority: 5.1
+    type: Concatenation
+    input_streams: [question_image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,10]]
+    output_dims: [-1,110]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+  ################# PIPE 6: C1 + C2 + C3 questions #################
+
+  # Answer encoding for PIPE 6.
+  pipe6_c123_answer_indexer:
+    type: LabelIndexer
+    priority: 6.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe6_c123_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123
+      word_mappings: word_mappings_all_c123
+
+  # Sample masking based on categories.
+  pipe6_c123_string_to_mask:
+    priority: 6.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_c2_c3_no_binary_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c123_by_question_categories_indices # NOT USED
+      masks: pipe6_c123_masks
+
+  # Model 4: FFN C123 answering
+  pipe6_c123_answer_classifier:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_c123_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c123
+
+  pipe6_c123_nllloss:
+    type: NLLLoss
+    priority: 6.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe6_c123_predictions
+      masks: pipe6_c123_masks
+      targets: pipe6_c123_answers_ids
+      loss: pipe6_c123_loss
+
+  pipe6_c123_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 6.5
+    use_word_mappings: True
+    use_masking: True
+    show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe6_c123_masks
+      predictions: pipe6_c123_predictions
+      targets: pipe6_c123_answers_ids
+    globals:
+      word_mappings: word_mappings_all_c123
+    statistics:
+      precision: pipe6_c123_precision
+      recall: pipe6_c123_recall
+      f1score: pipe6_c123_f1score
+
+  # C123 Predictions decoder.
+  pipe5_c123_prediction_decoder:
+    type: WordDecoder
+    priority: 6.6
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe6_c123_predictions
+      outputs: pipe6_c123_predicted_answers
+    globals:
+      word_mappings: word_mappings_all_c123
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 9.3
+    input_streams:
+      tokenized_questions, category_names,
+      pipe0_predicted_question_categories_names,
+      pipe6_c123_masks,
+      answers, pipe6_c123_predicted_answers
+
+
+#: pipeline

From 3098a07f0ca3c7370a987a16dfa0f7a83eecc5a3 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 21:49:01 -0700
Subject: [PATCH 10/28] cleanup: priorities first

---
 ...nary_lstm_resnet50_ewm_is_cat_ffn_loss.yml | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml
index fa58b97..dcc89a3 100644
--- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml
@@ -29,23 +29,23 @@ pipeline:
 
   # Add global variables.
   global_publisher:
-    type: GlobalVariablePublisher
     priority: 0
+    type: GlobalVariablePublisher
     # Add input_size to globals.
     keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c1_c2_c3_no_binary_word_to_ix]
     values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}]
 
   # Statistics.
   batch_size:
-    type: BatchSizeStatistics
     priority: 0.1
+    type: BatchSizeStatistics
 
   ################# PIPE 0: CATEGORY #################
 
   # Model 1: question embeddings
   pipe0_question_embeddings:
-    type: SentenceEmbeddings
     priority: 0.3
+    type: SentenceEmbeddings
     # LOAD AND FREEZE #
     load: 
       file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
@@ -114,8 +114,8 @@ pipeline:
       word_mappings: category_word_mappings
 
   pipe0_category_accuracy:
-    type: AccuracyStatistics
     priority: 0.7
+    type: AccuracyStatistics
     streams:
       targets: category_ids
       predictions: pipe0_predicted_question_categories_preds
@@ -126,8 +126,8 @@ pipeline:
 
   # Model 1: question embeddings
   pipe1_question_embeddings:
-    type: SentenceEmbeddings
     priority: 1.1
+    type: SentenceEmbeddings
     embeddings_size: 50
     pretrained_embeddings_file: glove.6B.50d.txt
     data_folder: ~/data/vqa-med
@@ -154,8 +154,8 @@ pipeline:
 
   # Answer encoding
   pipe1_all_answer_indexer:
-    type: LabelIndexer
     priority: 1.3
+    type: LabelIndexer
     data_folder: ~/data/vqa-med
     word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
     # Export mappings and size to globals.
@@ -171,9 +171,9 @@ pipeline:
 
   # Image encoder.
   image_encoder:
+    priority: 2.1
     type: TorchVisionWrapper
     model: vgg16
-    priority: 2.1
     streams:
       inputs: images
       outputs: image_activations
@@ -184,8 +184,8 @@ pipeline:
 
   # Model - image size classifier.
   image_size_encoder:
-    type: FeedForwardNetwork 
     priority: 3.1
+    type: FeedForwardNetwork 
     use_losfotmax: False
     streams:
       inputs: image_sizes
@@ -242,8 +242,8 @@ pipeline:
 
   # Answer encoding for PIPE 6.
   pipe6_c123_answer_indexer:
-    type: LabelIndexer
     priority: 6.1
+    type: LabelIndexer
     data_folder: ~/data/vqa-med
     word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
     # Export mappings and size to globals.
@@ -280,8 +280,8 @@ pipeline:
       prediction_size: vocabulary_size_c123
 
   pipe6_c123_nllloss:
-    type: NLLLoss
     priority: 6.4
+    type: NLLLoss
     targets_dim: 1
     use_masking: True
     streams:
@@ -291,8 +291,8 @@ pipeline:
       loss: pipe6_c123_loss
 
   pipe6_c123_precision_recall:
-    type: PrecisionRecallStatistics
     priority: 6.5
+    type: PrecisionRecallStatistics
     use_word_mappings: True
     use_masking: True
     show_class_scores: True
@@ -310,8 +310,8 @@ pipeline:
 
   # C123 Predictions decoder.
   pipe5_c123_prediction_decoder:
-    type: WordDecoder
     priority: 6.6
+    type: WordDecoder
     # Use the same word mappings as label indexer.
     import_word_mappings_from_globals: True
     streams:
@@ -324,8 +324,8 @@ pipeline:
 
   # Viewers.
   viewer:
-    type: StreamViewer
     priority: 9.3
+    type: StreamViewer
     input_streams:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,

From 0aaa1a527676bc6ab562f117dde2c7b8dda8f577 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 22:23:05 -0700
Subject: [PATCH 11/28] trainer fix: using name of config file when pipeline
 name not present

---
 ptp/workers/trainer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ptp/workers/trainer.py b/ptp/workers/trainer.py
index 9a93591..1182574 100644
--- a/ptp/workers/trainer.py
+++ b/ptp/workers/trainer.py
@@ -156,7 +156,9 @@ def setup_experiment(self):
             pipeline_name = self.config['pipeline']['name']
         except KeyError:
             # Using name of the first configuration file from command line.
-            pipeline_name = path.basename(root_configs[0])
+            basename = path.basename(root_configs[0])
+            # Take config filename without extension.
+            pipeline_name = path.splitext(basename)[0] 
             # Set pipeline name, so processor can use it afterwards.
             self.config['pipeline'].add_config_params({'name': pipeline_name})
 

From 6ad9c28ae2975d3536dd88f6f3e8c6dd62d191b0 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 23:32:07 -0700
Subject: [PATCH 12/28] cleanup and rename of simple vf configs

---
 ...net152_is_cat_ffn_c123_no_binary_loss.yml} |  20 +-
 ..._ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml} |  28 +-
 ...t50_ewm_is_cat_ffn_c123_no_binary_loss.yml | 334 ++++++++++++++++++
 ...snet50_is_cat_ffn_c123_no_binary_loss.yml} |  20 +-
 ..._vgg16_is_cat_ffn_c123_binary_yn_loss.yml} |  21 +-
 5 files changed, 377 insertions(+), 46 deletions(-)
 rename configs/vqa_med_2019/vf/{c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml => lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml} (94%)
 rename configs/vqa_med_2019/vf/{c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml => lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml} (94%)
 create mode 100644 configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml
 rename configs/vqa_med_2019/vf/{c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml => lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml} (94%)
 rename configs/vqa_med_2019/vf/{c123_binary_lstm_vgg16_cat_ffn_loss.yml => lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml} (94%)

diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml
similarity index 94%
rename from configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
rename to configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml
index 9b86692..b52cf92 100644
--- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml
@@ -32,7 +32,7 @@ pipeline:
     type: GlobalVariablePublisher
     priority: 0
     # Add input_size to globals.
-    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix]
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c123_without_yn_word_to_ix]
     values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}]
 
   # Statistics.
@@ -74,7 +74,6 @@ pipeline:
     prediction_mode: Last
     initial_state: Trainable
     use_logsoftmax: False
-    dropout_rate: 0.5
     streams:
       inputs: pipe0_embedded_questions
       predictions: pipe0_questions_activations
@@ -144,7 +143,6 @@ pipeline:
     prediction_mode: Last
     initial_state: Trainable
     use_logsoftmax: False
-    dropout_rate: 0.5
     streams:
       inputs: embedded_questions
       predictions: questions_activations
@@ -164,8 +162,8 @@ pipeline:
       inputs: answers
       outputs: all_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_all_c123
-      word_mappings: word_mappings_all_c123
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
 
   ################# PIPE 2: SHARED IMAGE ENCODER #################
 
@@ -224,15 +222,15 @@ pipeline:
       inputs: answers
       outputs: pipe5_c123_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_c123
-      word_mappings: word_mappings_all_c123
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
 
   # Sample masking based on categories.
   pipe5_c123_string_to_mask:
     priority: 5.2
     type: StringToMask
     globals:
-      word_mappings: category_c1_c2_c3_no_binary_word_to_ix
+      word_mappings: category_c123_without_yn_word_to_ix
     streams:
       strings: pipe0_predicted_question_categories_names
       string_indices: predicted_c123_by_question_categories_indices # NOT USED
@@ -249,7 +247,7 @@ pipeline:
       predictions: pipe5_c123_predictions
     globals:
       input_size: concatenated_activations_size
-      prediction_size: vocabulary_size_c123
+      prediction_size: vocabulary_size_c123_without_yn
 
   pipe5_c123_nllloss:
     type: NLLLoss
@@ -274,7 +272,7 @@ pipeline:
       predictions: pipe5_c123_predictions
       targets: pipe5_c123_answers_ids
     globals:
-      word_mappings: word_mappings_all_c123
+      word_mappings: word_mappings_c123_without_yn
     statistics:
       precision: pipe5_c123_precision
       recall: pipe5_c123_recall
@@ -290,7 +288,7 @@ pipeline:
       inputs: pipe5_c123_predictions
       outputs: pipe5_c123_predicted_answers
     globals:
-      word_mappings: word_mappings_all_c123
+      word_mappings: word_mappings_c123_without_yn
 
   ################# PIPE 9: MERGE ANSWERS #################
 
diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml
similarity index 94%
rename from configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml
rename to configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml
index dcc89a3..b0db77b 100644
--- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml
@@ -32,7 +32,7 @@ pipeline:
     priority: 0
     type: GlobalVariablePublisher
     # Add input_size to globals.
-    keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c1_c2_c3_no_binary_word_to_ix]
+    keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c123_without_yn_word_to_ix]
     values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}]
 
   # Statistics.
@@ -74,7 +74,6 @@ pipeline:
     prediction_mode: Last
     initial_state: Trainable
     use_logsoftmax: False
-    dropout_rate: 0.5
     streams:
       inputs: pipe0_embedded_questions
       predictions: pipe0_question_activations
@@ -144,7 +143,6 @@ pipeline:
     prediction_mode: Last
     initial_state: Trainable
     use_logsoftmax: False
-    dropout_rate: 0.5
     streams:
       inputs: embedded_questions
       predictions: question_activations
@@ -164,8 +162,8 @@ pipeline:
       inputs: answers
       outputs: all_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_all_c123
-      word_mappings: word_mappings_all_c123
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
 
   ################# PIPE 2: SHARED IMAGE ENCODER #################
 
@@ -173,7 +171,7 @@ pipeline:
   image_encoder:
     priority: 2.1
     type: TorchVisionWrapper
-    model: vgg16
+    model: resnet50
     streams:
       inputs: images
       outputs: image_activations
@@ -252,15 +250,15 @@ pipeline:
       inputs: answers
       outputs: pipe6_c123_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_c123
-      word_mappings: word_mappings_all_c123
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
 
   # Sample masking based on categories.
   pipe6_c123_string_to_mask:
     priority: 6.2
     type: StringToMask
     globals:
-      word_mappings: category_c1_c2_c3_no_binary_word_to_ix
+      word_mappings: category_c123_without_yn_word_to_ix
     streams:
       strings: pipe0_predicted_question_categories_names
       string_indices: predicted_c123_by_question_categories_indices # NOT USED
@@ -277,7 +275,7 @@ pipeline:
       predictions: pipe6_c123_predictions
     globals:
       input_size: concatenated_activations_size
-      prediction_size: vocabulary_size_c123
+      prediction_size: vocabulary_size_c123_without_yn
 
   pipe6_c123_nllloss:
     priority: 6.4
@@ -302,7 +300,7 @@ pipeline:
       predictions: pipe6_c123_predictions
       targets: pipe6_c123_answers_ids
     globals:
-      word_mappings: word_mappings_all_c123
+      word_mappings: word_mappings_c123_without_yn
     statistics:
       precision: pipe6_c123_precision
       recall: pipe6_c123_recall
@@ -318,7 +316,13 @@ pipeline:
       inputs: pipe6_c123_predictions
       outputs: pipe6_c123_predicted_answers
     globals:
-      word_mappings: word_mappings_all_c123
+      word_mappings: word_mappings_c123_without_yn
+
+
+
+
+
+
 
   ################# PIPE 9: MERGE ANSWERS #################
 
diff --git a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml
new file mode 100644
index 0000000..cba072b
--- /dev/null
+++ b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml
@@ -0,0 +1,334 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1,C2,C3
+    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1,C2,C3
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+
+
+pipeline:
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    # Add input_size to globals.
+    keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c123_without_yn_word_to_ix]
+    values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}]
+
+  # Statistics.
+  batch_size:
+    priority: 0.1
+    type: BatchSizeStatistics
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    priority: 0.3
+    type: SentenceEmbeddings
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_question_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_encoder_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    priority: 0.7
+    type: AccuracyStatistics
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    priority: 1.1
+    type: SentenceEmbeddings
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    priority: 1.3
+    type: LabelIndexer
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    priority: 2.1
+    type: TorchVisionWrapper
+    model: resnet50
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    priority: 3.1
+    type: FeedForwardNetwork 
+    use_losfotmax: False
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: image-question fusion  #################
+  # Element wise multiplication + FF.
+  question_image_fusion:
+    priority: 4.1
+    type: ElementWiseMultiplication
+    dropout_rate: 0.5
+    streams:
+      image_encodings: image_activations
+      question_encodings: question_activations
+      outputs: element_wise_activations
+    globals:
+      image_encoding_size: image_encoder_output_size
+      question_encoding_size: question_encoder_output_size
+      output_size: element_wise_activation_size
+
+  question_image_ffn:
+    priority: 4.2
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    use_logsoftmax: False
+    streams:
+      inputs: element_wise_activations
+      predictions: question_image_activations
+    globals:
+      input_size: element_wise_activation_size
+      prediction_size: element_wise_activation_size
+
+  ################# PIPE 5: image-question-image size fusion #################
+
+  # 5th subpipeline: concatenation 
+  concat:
+    priority: 5.1
+    type: Concatenation
+    input_streams: [question_image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,10]]
+    output_dims: [-1,110]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+  ################# PIPE 6: C1 + C2 + C3 questions #################
+
+  # Answer encoding for PIPE 6.
+  pipe6_c123_answer_indexer:
+    priority: 6.1
+    type: LabelIndexer
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe6_c123_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
+
+  # Sample masking based on categories.
+  pipe6_c123_string_to_mask:
+    priority: 6.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c123_without_yn_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c123_by_question_categories_indices # NOT USED
+      masks: pipe6_c123_masks
+
+  # Model 4: FFN C123 answering
+  pipe6_c123_answer_classifier:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_c123_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c123_without_yn
+
+  pipe6_c123_nllloss:
+    priority: 6.4
+    type: NLLLoss
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe6_c123_predictions
+      masks: pipe6_c123_masks
+      targets: pipe6_c123_answers_ids
+      loss: pipe6_c123_loss
+
+  pipe6_c123_precision_recall:
+    priority: 6.5
+    type: PrecisionRecallStatistics
+    use_word_mappings: True
+    use_masking: True
+    show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe6_c123_masks
+      predictions: pipe6_c123_predictions
+      targets: pipe6_c123_answers_ids
+    globals:
+      word_mappings: word_mappings_c123_without_yn
+    statistics:
+      precision: pipe6_c123_precision
+      recall: pipe6_c123_recall
+      f1score: pipe6_c123_f1score
+
+  # C123 Predictions decoder.
+  pipe5_c123_prediction_decoder:
+    priority: 6.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe6_c123_predictions
+      outputs: pipe6_c123_predicted_answers
+    globals:
+      word_mappings: word_mappings_c123_without_yn
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Viewers.
+  viewer:
+    priority: 9.3
+    type: StreamViewer
+    input_streams:
+      tokenized_questions, category_names,
+      pipe0_predicted_question_categories_names,
+      pipe6_c123_masks,
+      answers, pipe6_c123_predicted_answers
+
+
+#: pipeline
diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml
similarity index 94%
rename from configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
rename to configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml
index 3c92309..88a3bd7 100644
--- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml
@@ -32,7 +32,7 @@ pipeline:
     type: GlobalVariablePublisher
     priority: 0
     # Add input_size to globals.
-    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix]
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c123_without_yn_word_to_ix]
     values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}]
 
   # Statistics.
@@ -74,7 +74,6 @@ pipeline:
     prediction_mode: Last
     initial_state: Trainable
     use_logsoftmax: False
-    dropout_rate: 0.5
     streams:
       inputs: pipe0_embedded_questions
       predictions: pipe0_questions_activations
@@ -144,7 +143,6 @@ pipeline:
     prediction_mode: Last
     initial_state: Trainable
     use_logsoftmax: False
-    dropout_rate: 0.5
     streams:
       inputs: embedded_questions
       predictions: questions_activations
@@ -164,8 +162,8 @@ pipeline:
       inputs: answers
       outputs: all_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_all_c123
-      word_mappings: word_mappings_all_c123
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
 
   ################# PIPE 2: SHARED IMAGE ENCODER #################
 
@@ -224,15 +222,15 @@ pipeline:
       inputs: answers
       outputs: pipe5_c123_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_c123
-      word_mappings: word_mappings_all_c123
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
 
   # Sample masking based on categories.
   pipe5_c123_string_to_mask:
     priority: 5.2
     type: StringToMask
     globals:
-      word_mappings: category_c1_c2_c3_no_binary_word_to_ix
+      word_mappings: category_c123_without_yn_word_to_ix
     streams:
       strings: pipe0_predicted_question_categories_names
       string_indices: predicted_c123_by_question_categories_indices # NOT USED
@@ -249,7 +247,7 @@ pipeline:
       predictions: pipe5_c123_predictions
     globals:
       input_size: concatenated_activations_size
-      prediction_size: vocabulary_size_c123
+      prediction_size: vocabulary_size_c123_without_yn
 
   pipe5_c123_nllloss:
     type: NLLLoss
@@ -274,7 +272,7 @@ pipeline:
       predictions: pipe5_c123_predictions
       targets: pipe5_c123_answers_ids
     globals:
-      word_mappings: word_mappings_all_c123
+      word_mappings: word_mappings_c123_without_yn
     statistics:
       precision: pipe5_c123_precision
       recall: pipe5_c123_recall
@@ -290,7 +288,7 @@ pipeline:
       inputs: pipe5_c123_predictions
       outputs: pipe5_c123_predicted_answers
     globals:
-      word_mappings: word_mappings_all_c123
+      word_mappings: word_mappings_c123_without_yn
 
   ################# PIPE 9: MERGE ANSWERS #################
 
diff --git a/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
similarity index 94%
rename from configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml
rename to configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
index 3d5b25a..a4d45b8 100644
--- a/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
@@ -32,8 +32,8 @@ pipeline:
     type: GlobalVariablePublisher
     priority: 0
     # Add input_size to globals.
-    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix]
-    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}]
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix]
+    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}]
 
   # Statistics.
   batch_size:
@@ -74,7 +74,6 @@ pipeline:
     prediction_mode: Last
     initial_state: Trainable
     use_logsoftmax: False
-    dropout_rate: 0.5
     streams:
       inputs: pipe0_embedded_questions
       predictions: pipe0_questions_activations
@@ -93,7 +92,6 @@ pipeline:
     freeze: True
     ###################
     hidden: [50]
-    dropout_rate: 0.5
     streams:
       inputs: pipe0_questions_activations
       predictions: pipe0_predicted_question_categories_preds
@@ -144,7 +142,6 @@ pipeline:
     prediction_mode: Last
     initial_state: Trainable
     use_logsoftmax: False
-    dropout_rate: 0.5
     streams:
       inputs: embedded_questions
       predictions: questions_activations
@@ -165,7 +162,7 @@ pipeline:
       outputs: all_answers_ids
     globals:
       vocabulary_size: vocabulary_size_all_c123
-      word_mappings: word_mappings_all_c123
+      word_mappings: word_mappings_all_c123_binary_yn
 
   ################# PIPE 2: SHARED IMAGE ENCODER #################
 
@@ -223,15 +220,15 @@ pipeline:
       inputs: answers
       outputs: pipe5_c123_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_c123
-      word_mappings: word_mappings_all_c123
+      vocabulary_size: vocabulary_size_c123_binary_yn
+      word_mappings: word_mappings_all_c123_binary_yn
 
   # Sample masking based on categories.
   pipe5_c123_string_to_mask:
     priority: 5.2
     type: StringToMask
     globals:
-      word_mappings: category_c1_c2_c3_no_binary_word_to_ix
+      word_mappings: category_c1_c2_c3_binary_yn_word_to_ix
     streams:
       strings: pipe0_predicted_question_categories_names
       string_indices: predicted_c123_by_question_categories_indices # NOT USED
@@ -248,7 +245,7 @@ pipeline:
       predictions: pipe5_c123_predictions
     globals:
       input_size: concatenated_activations_size
-      prediction_size: vocabulary_size_c123
+      prediction_size: vocabulary_size_c123_binary_yn
 
   pipe5_c123_nllloss:
     type: NLLLoss
@@ -273,7 +270,7 @@ pipeline:
       predictions: pipe5_c123_predictions
       targets: pipe5_c123_answers_ids
     globals:
-      word_mappings: word_mappings_all_c123
+      word_mappings: word_mappings_all_c123_binary_yn
     statistics:
       precision: pipe5_c123_precision
       recall: pipe5_c123_recall
@@ -289,7 +286,7 @@ pipeline:
       inputs: pipe5_c123_predictions
       outputs: pipe5_c123_predicted_answers
     globals:
-      word_mappings: word_mappings_all_c123
+      word_mappings: word_mappings_all_c123_binary_yn
 
   ################# PIPE 9: MERGE ANSWERS #################
 

From d8c588c189f4b154f00e92075986e3d308e4c2aa Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 23:39:49 -0700
Subject: [PATCH 13/28] lstm_vgg16_is_cat_ffn_only_yn_loss.yml

---
 .../lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml | 305 ++++++++++++++++++
 .../vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml | 305 ++++++++++++++++++
 2 files changed, 610 insertions(+)
 create mode 100644 configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
 create mode 100644 configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml

diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
new file mode 100644
index 0000000..a4d45b8
--- /dev/null
+++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
@@ -0,0 +1,305 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1,C2,C3
+    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1,C2,C3
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+
+
+pipeline:
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix]
+    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    streams:
+      inputs: pipe0_questions_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    type: AccuracyStatistics
+    priority: 0.7
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    streams:
+      inputs: embedded_questions
+      predictions: questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    type: LabelIndexer
+    priority: 1.3
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_all_c123
+      word_mappings: word_mappings_all_c123_binary_yn
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    model: vgg16
+    priority: 2.1
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 3.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: SHARED CONCAT #################
+
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [questions_activations,image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100],[-1,10]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+
+  ################# PIPE 5: C1 + C2 + C3 questions #################
+
+  # Answer encoding for PIPE 5.
+  pipe5_c123_answer_indexer:
+    type: LabelIndexer
+    priority: 5.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe5_c123_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123_binary_yn
+      word_mappings: word_mappings_all_c123_binary_yn
+
+  # Sample masking based on categories.
+  pipe5_c123_string_to_mask:
+    priority: 5.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_c2_c3_binary_yn_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c123_by_question_categories_indices # NOT USED
+      masks: pipe5_c123_masks
+
+  # Model 4: FFN C1 answering
+  pipe5_c123_ffn:
+    priority: 5.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe5_c123_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c123_binary_yn
+
+  pipe5_c123_nllloss:
+    type: NLLLoss
+    priority: 5.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe5_c123_predictions
+      masks: pipe5_c123_masks
+      targets: pipe5_c123_answers_ids
+      loss: pipe5_c123_loss
+
+  pipe5_c123_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 5.5
+    use_word_mappings: True
+    use_masking: True
+    show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe5_c123_masks
+      predictions: pipe5_c123_predictions
+      targets: pipe5_c123_answers_ids
+    globals:
+      word_mappings: word_mappings_all_c123_binary_yn
+    statistics:
+      precision: pipe5_c123_precision
+      recall: pipe5_c123_recall
+      f1score: pipe5_c123_f1score
+
+  # C123 Predictions decoder.
+  pipe5_prediction_decoder:
+    type: WordDecoder
+    priority: 5.6
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe5_c123_predictions
+      outputs: pipe5_c123_predicted_answers
+    globals:
+      word_mappings: word_mappings_all_c123_binary_yn
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 9.3
+    input_streams:
+      tokenized_questions, category_names,
+      pipe0_predicted_question_categories_names,
+      pipe5_c123_masks,
+      answers, pipe5_c123_predicted_answers
+
+
+#: pipeline
diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml
new file mode 100644
index 0000000..be6e659
--- /dev/null
+++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml
@@ -0,0 +1,305 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1,C2,C3
+    export_sample_weights: ~/data/vqa-med/answers.binary_yn.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.binary_yn.weights.csv
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1,C2,C3
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+
+
+pipeline:
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_binary_yn_word_to_ix]
+    values: [100, 2, 10, 100, {"BINARY": 3}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    streams:
+      inputs: pipe0_questions_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    type: AccuracyStatistics
+    priority: 0.7
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Trainable
+    use_logsoftmax: False
+    streams:
+      inputs: embedded_questions
+      predictions: questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    type: LabelIndexer
+    priority: 1.3
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_binary_yn
+      word_mappings: word_mappings_binary_yn
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    model: vgg16
+    priority: 2.1
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 3.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: SHARED CONCAT #################
+
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [questions_activations,image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100],[-1,10]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+
+  ################# PIPE 5: C1 + C2 + C3 questions #################
+
+  # Answer encoding for PIPE 5.
+  pipe5_binary_yn_answer_indexer:
+    type: LabelIndexer
+    priority: 5.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe5_binary_yn_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_binary_yn
+      word_mappings: word_mappings_binary_yn
+
+  # Sample masking based on categories.
+  pipe5_binary_yn_string_to_mask:
+    priority: 5.2
+    type: StringToMask
+    globals:
+      word_mappings: category_binary_yn_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c123_by_question_categories_indices # NOT USED
+      masks: pipe5_binary_yn_masks
+
+  # Model 4: FFN C1 answering
+  pipe5_binary_yn_ffn:
+    priority: 5.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe5_binary_yn_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_binary_yn
+
+  pipe5_binary_yn_nllloss:
+    type: NLLLoss
+    priority: 5.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe5_binary_yn_predictions
+      masks: pipe5_binary_yn_masks
+      targets: pipe5_binary_yn_answers_ids
+      loss: pipe5_binary_yn_loss
+
+  pipe5_binary_yn_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 5.5
+    use_word_mappings: True
+    use_masking: True
+    show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe5_binary_yn_masks
+      predictions: pipe5_binary_yn_predictions
+      targets: pipe5_binary_yn_answers_ids
+    globals:
+      word_mappings: word_mappings_binary_yn
+    statistics:
+      precision: pipe5_binary_yn_precision
+      recall: pipe5_binary_yn_recall
+      f1score: pipe5_binary_yn_f1score
+
+  # C123 Predictions decoder.
+  pipe5_prediction_decoder:
+    type: WordDecoder
+    priority: 5.6
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe5_binary_yn_predictions
+      outputs: pipe5_binary_yn_predicted_answers
+    globals:
+      word_mappings: word_mappings_binary_yn
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 9.3
+    input_streams:
+      tokenized_questions, category_names,
+      pipe0_predicted_question_categories_names,
+      pipe5_binary_yn_masks,
+      answers, pipe5_binary_yn_predicted_answers
+
+
+#: pipeline

From 529fb993f7fd773c76fa01a04f549ee8163a3ef6 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 23:48:11 -0700
Subject: [PATCH 14/28] 
 configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml

---
 .../lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml | 68 +++++++++----------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
index a4d45b8..b9b4dc2 100644
--- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
@@ -32,8 +32,8 @@ pipeline:
     type: GlobalVariablePublisher
     priority: 0
     # Add input_size to globals.
-    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix]
-    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}]
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_without_yn_word_to_ix]
+    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}]
 
   # Statistics.
   batch_size:
@@ -154,15 +154,15 @@ pipeline:
     type: LabelIndexer
     priority: 1.3
     data_folder: ~/data/vqa-med
-    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
     # Export mappings and size to globals.
     export_word_mappings_to_globals: True
     streams:
       inputs: answers
       outputs: all_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_all_c123
-      word_mappings: word_mappings_all_c123_binary_yn
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
 
   ################# PIPE 2: SHARED IMAGE ENCODER #################
 
@@ -209,56 +209,56 @@ pipeline:
   ################# PIPE 5: C1 + C2 + C3 questions #################
 
   # Answer encoding for PIPE 5.
-  pipe5_c123_answer_indexer:
+  pipe5_c123_without_yn_answer_indexer:
     type: LabelIndexer
     priority: 5.1
     data_folder: ~/data/vqa-med
-    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
     # Export mappings and size to globals.
     export_word_mappings_to_globals: True
     streams:
       inputs: answers
-      outputs: pipe5_c123_answers_ids
+      outputs: pipe5_c123_without_yn_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_c123_binary_yn
-      word_mappings: word_mappings_all_c123_binary_yn
+      vocabulary_size: vocabulary_size_c123_without_yn
+      word_mappings: word_mappings_c123_without_yn
 
   # Sample masking based on categories.
-  pipe5_c123_string_to_mask:
+  pipe5_c123_without_yn_string_to_mask:
     priority: 5.2
     type: StringToMask
     globals:
-      word_mappings: category_c1_c2_c3_binary_yn_word_to_ix
+      word_mappings: category_c1_c2_c3_without_yn_word_to_ix
     streams:
       strings: pipe0_predicted_question_categories_names
       string_indices: predicted_c123_by_question_categories_indices # NOT USED
-      masks: pipe5_c123_masks
+      masks: pipe5_c123_without_yn_masks
 
   # Model 4: FFN C1 answering
-  pipe5_c123_ffn:
+  pipe5_c123_without_yn_ffn:
     priority: 5.3
     type: FeedForwardNetwork
     hidden: [50]
     dropout_rate: 0.5
     streams:
       inputs: concatenated_activations
-      predictions: pipe5_c123_predictions
+      predictions: pipe5_c123_without_yn_predictions
     globals:
       input_size: concatenated_activations_size
-      prediction_size: vocabulary_size_c123_binary_yn
+      prediction_size: vocabulary_size_c123_without_yn
 
-  pipe5_c123_nllloss:
+  pipe5_c123_without_yn_nllloss:
     type: NLLLoss
     priority: 5.4
     targets_dim: 1
     use_masking: True
     streams:
-      predictions: pipe5_c123_predictions
-      masks: pipe5_c123_masks
-      targets: pipe5_c123_answers_ids
-      loss: pipe5_c123_loss
+      predictions: pipe5_c123_without_yn_predictions
+      masks: pipe5_c123_without_yn_masks
+      targets: pipe5_c123_without_yn_answers_ids
+      loss: pipe5_c123_without_yn_loss
 
-  pipe5_c123_precision_recall:
+  pipe5_c123_without_yn_precision_recall:
     type: PrecisionRecallStatistics
     priority: 5.5
     use_word_mappings: True
@@ -266,15 +266,15 @@ pipeline:
     show_class_scores: True
     #show_confusion_matrix: True
     streams:
-      masks: pipe5_c123_masks
-      predictions: pipe5_c123_predictions
-      targets: pipe5_c123_answers_ids
+      masks: pipe5_c123_without_yn_masks
+      predictions: pipe5_c123_without_yn_predictions
+      targets: pipe5_c123_without_yn_answers_ids
     globals:
-      word_mappings: word_mappings_all_c123_binary_yn
+      word_mappings: word_mappings_c123_without_yn
     statistics:
-      precision: pipe5_c123_precision
-      recall: pipe5_c123_recall
-      f1score: pipe5_c123_f1score
+      precision: pipe5_c123_without_yn_precision
+      recall: pipe5_c123_without_yn_recall
+      f1score: pipe5_c123_without_yn_f1score
 
   # C123 Predictions decoder.
   pipe5_prediction_decoder:
@@ -283,10 +283,10 @@ pipeline:
     # Use the same word mappings as label indexer.
     import_word_mappings_from_globals: True
     streams:
-      inputs: pipe5_c123_predictions
-      outputs: pipe5_c123_predicted_answers
+      inputs: pipe5_c123_without_yn_predictions
+      outputs: pipe5_c123_without_yn_predicted_answers
     globals:
-      word_mappings: word_mappings_all_c123_binary_yn
+      word_mappings: word_mappings_c123_without_yn
 
   ################# PIPE 9: MERGE ANSWERS #################
 
@@ -298,8 +298,8 @@ pipeline:
     input_streams:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,
-      pipe5_c123_masks,
-      answers, pipe5_c123_predicted_answers
+      pipe5_c123_without_yn_masks,
+      answers, pipe5_c123_without_yn_predicted_answers
 
 
 #: pipeline

From 1c4722ae53cb82c4b3339e4a7009c0acaa0ad04c Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 29 Apr 2019 23:57:37 -0700
Subject: [PATCH 15/28] extend_answers working on tokenized_answers returned
 from problem

---
 configs/vqa_med_2019/default_extend_answers.yml | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/configs/vqa_med_2019/default_extend_answers.yml b/configs/vqa_med_2019/default_extend_answers.yml
index 270d5d1..929c555 100644
--- a/configs/vqa_med_2019/default_extend_answers.yml
+++ b/configs/vqa_med_2019/default_extend_answers.yml
@@ -9,6 +9,10 @@ training_answers:
     categories: all
     resize_image: &resize_image [224, 224]
     batch_size: 64
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
   dataloader:
     # No sampler, process samples in the same order.
     shuffle: false
@@ -22,6 +26,10 @@ validation_answers:
     split: validation
     resize_image: *resize_image     
     batch_size: 64
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
   dataloader:
     # No sampler, process samples in the same order.
     shuffle: false
@@ -37,6 +45,10 @@ test_answers:
     split: test
     resize_image: *resize_image     
     batch_size: 64
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
   dataloader:
     # No sampler, process samples in the same order.
     shuffle: false
@@ -45,7 +57,7 @@ test_answers:
 
 # Add component for exporting answers to files.
 pipeline:
-  disable: viewer
+  disable: viewer,question_tokenizer
 #  # Viewers.
   viewer_extended:
     priority: 100.4

From da3c2f818c11d5029dc64c0ca6a6928db6f0986a Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 00:04:50 -0700
Subject: [PATCH 16/28] default_extended_answers: predicted_answers

---
 configs/vqa_med_2019/default_extend_answers.yml        | 10 ++++++++--
 .../vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml   |  4 ++--
 .../vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml       |  6 +++---
 .../vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml          |  8 ++++----
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/configs/vqa_med_2019/default_extend_answers.yml b/configs/vqa_med_2019/default_extend_answers.yml
index 929c555..97e9ddf 100644
--- a/configs/vqa_med_2019/default_extend_answers.yml
+++ b/configs/vqa_med_2019/default_extend_answers.yml
@@ -63,12 +63,18 @@ pipeline:
     priority: 100.4
     type: StreamViewer
     sample_number: 0
-    input_streams: indices,image_ids,questions,category_names,predicted_categories,answers,tokenized_answers,predicted_answers
+    input_streams: 
+      indices,image_ids,tokenized_questions,
+      category_names,predicted_categories,
+      answers,tokenized_answers,predicted_answers
 
   exporter:
     priority: 100.5
     type: StreamFileExporter
     separator: '|'
-    input_streams: indices,image_ids,questions,category_names,predicted_categories,answers,tokenized_answers,predicted_answers
+    input_streams: 
+      indices,image_ids,tokenized_questions,
+      category_names,predicted_categories,
+      answers,tokenized_answers,predicted_answers
 
 #: pipeline
diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
index a4d45b8..a420f14 100644
--- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
@@ -284,7 +284,7 @@ pipeline:
     import_word_mappings_from_globals: True
     streams:
       inputs: pipe5_c123_predictions
-      outputs: pipe5_c123_predicted_answers
+      outputs: predicted_answers
     globals:
       word_mappings: word_mappings_all_c123_binary_yn
 
@@ -299,7 +299,7 @@ pipeline:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,
       pipe5_c123_masks,
-      answers, pipe5_c123_predicted_answers
+      answers, predicted_answers
 
 
 #: pipeline
diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
index b9b4dc2..1bf7bdc 100644
--- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
@@ -238,7 +238,7 @@ pipeline:
   pipe5_c123_without_yn_ffn:
     priority: 5.3
     type: FeedForwardNetwork
-    hidden: [50]
+    hidden: [100]
     dropout_rate: 0.5
     streams:
       inputs: concatenated_activations
@@ -284,7 +284,7 @@ pipeline:
     import_word_mappings_from_globals: True
     streams:
       inputs: pipe5_c123_without_yn_predictions
-      outputs: pipe5_c123_without_yn_predicted_answers
+      outputs: predicted_answers
     globals:
       word_mappings: word_mappings_c123_without_yn
 
@@ -299,7 +299,7 @@ pipeline:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,
       pipe5_c123_without_yn_masks,
-      answers, pipe5_c123_without_yn_predicted_answers
+      answers, predicted_answers
 
 
 #: pipeline
diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml
index be6e659..03c2cea 100644
--- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml
@@ -235,10 +235,10 @@ pipeline:
       masks: pipe5_binary_yn_masks
 
   # Model 4: FFN C1 answering
-  pipe5_binary_yn_ffn:
+  pipe5_binary_yn_classifier:
     priority: 5.3
     type: FeedForwardNetwork
-    hidden: [50]
+    hidden: [100]
     dropout_rate: 0.5
     streams:
       inputs: concatenated_activations
@@ -284,7 +284,7 @@ pipeline:
     import_word_mappings_from_globals: True
     streams:
       inputs: pipe5_binary_yn_predictions
-      outputs: pipe5_binary_yn_predicted_answers
+      outputs: predicted_answers
     globals:
       word_mappings: word_mappings_binary_yn
 
@@ -299,7 +299,7 @@ pipeline:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,
       pipe5_binary_yn_masks,
-      answers, pipe5_binary_yn_predicted_answers
+      answers, predicted_answers
 
 
 #: pipeline

From 32706a916be5bad6e11c7223aab6983236d09f70 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 00:23:15 -0700
Subject: [PATCH 17/28] 
 configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml

---
 ...m_vgg16_is_cat_ffn_c123_binary_yn_loss.yml | 54 +++++++++----------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
index a420f14..71c3946 100644
--- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
@@ -161,8 +161,8 @@ pipeline:
       inputs: answers
       outputs: all_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_all_c123
-      word_mappings: word_mappings_all_c123_binary_yn
+      vocabulary_size: vocabulary_size_c123_binary_yn
+      word_mappings: word_mappings_c123_binary_yn
 
   ################# PIPE 2: SHARED IMAGE ENCODER #################
 
@@ -206,10 +206,10 @@ pipeline:
       output_size: concatenated_activations_size
 
 
-  ################# PIPE 5: C1 + C2 + C3 questions #################
+  ################# PIPE 5: C1 + C2 + C3 + BINARY questions #################
 
   # Answer encoding for PIPE 5.
-  pipe5_c123_answer_indexer:
+  pipe5_c123_binary_yn_answer_indexer:
     type: LabelIndexer
     priority: 5.1
     data_folder: ~/data/vqa-med
@@ -218,13 +218,13 @@ pipeline:
     export_word_mappings_to_globals: True
     streams:
       inputs: answers
-      outputs: pipe5_c123_answers_ids
+      outputs: pipe5_c123_binary_yn_answers_ids
     globals:
       vocabulary_size: vocabulary_size_c123_binary_yn
-      word_mappings: word_mappings_all_c123_binary_yn
+      word_mappings: word_mappings_c123_binary_yn
 
   # Sample masking based on categories.
-  pipe5_c123_string_to_mask:
+  pipe5_c123_binary_yn_string_to_mask:
     priority: 5.2
     type: StringToMask
     globals:
@@ -232,33 +232,33 @@ pipeline:
     streams:
       strings: pipe0_predicted_question_categories_names
       string_indices: predicted_c123_by_question_categories_indices # NOT USED
-      masks: pipe5_c123_masks
+      masks: pipe5_c123_binary_yn_masks
 
   # Model 4: FFN C1 answering
-  pipe5_c123_ffn:
+  pipe5_c123_binary_yn_ffn:
     priority: 5.3
     type: FeedForwardNetwork
-    hidden: [50]
+    hidden: [100]
     dropout_rate: 0.5
     streams:
       inputs: concatenated_activations
-      predictions: pipe5_c123_predictions
+      predictions: pipe5_c123_binary_yn_predictions
     globals:
       input_size: concatenated_activations_size
       prediction_size: vocabulary_size_c123_binary_yn
 
-  pipe5_c123_nllloss:
+  pipe5_c123_binary_yn_nllloss:
     type: NLLLoss
     priority: 5.4
     targets_dim: 1
     use_masking: True
     streams:
-      predictions: pipe5_c123_predictions
-      masks: pipe5_c123_masks
-      targets: pipe5_c123_answers_ids
-      loss: pipe5_c123_loss
+      predictions: pipe5_c123_binary_yn_predictions
+      masks: pipe5_c123_binary_yn_masks
+      targets: pipe5_c123_binary_yn_answers_ids
+      loss: pipe5_c123_binary_yn_loss
 
-  pipe5_c123_precision_recall:
+  pipe5_c123_binary_yn_precision_recall:
     type: PrecisionRecallStatistics
     priority: 5.5
     use_word_mappings: True
@@ -266,15 +266,15 @@ pipeline:
     show_class_scores: True
     #show_confusion_matrix: True
     streams:
-      masks: pipe5_c123_masks
-      predictions: pipe5_c123_predictions
-      targets: pipe5_c123_answers_ids
+      masks: pipe5_c123_binary_yn_masks
+      predictions: pipe5_c123_binary_yn_predictions
+      targets: pipe5_c123_binary_yn_answers_ids
     globals:
-      word_mappings: word_mappings_all_c123_binary_yn
+      word_mappings: word_mappings_c123_binary_yn
     statistics:
-      precision: pipe5_c123_precision
-      recall: pipe5_c123_recall
-      f1score: pipe5_c123_f1score
+      precision: pipe5_c123_binary_yn_precision
+      recall: pipe5_c123_binary_yn_recall
+      f1score: pipe5_c123_binary_yn_f1score
 
   # C123 Predictions decoder.
   pipe5_prediction_decoder:
@@ -283,10 +283,10 @@ pipeline:
     # Use the same word mappings as label indexer.
     import_word_mappings_from_globals: True
     streams:
-      inputs: pipe5_c123_predictions
+      inputs: pipe5_c123_binary_yn_predictions
       outputs: predicted_answers
     globals:
-      word_mappings: word_mappings_all_c123_binary_yn
+      word_mappings: word_mappings_c123_binary_yn
 
   ################# PIPE 9: MERGE ANSWERS #################
 
@@ -298,7 +298,7 @@ pipeline:
     input_streams:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,
-      pipe5_c123_masks,
+      pipe5_c123_binary_yn_masks,
       answers, predicted_answers
 
 

From 5556abd5a201b9ee7fadaeb6b5e9c4f13bf1e9d1 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 00:45:23 -0700
Subject: [PATCH 18/28] 
 configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml

---
 ...snet152_is_cat_ffn_c123_no_binary_loss.yml | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml
index b52cf92..5a541f2 100644
--- a/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml
@@ -155,7 +155,7 @@ pipeline:
     type: LabelIndexer
     priority: 1.3
     data_folder: ~/data/vqa-med
-    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
     # Export mappings and size to globals.
     export_word_mappings_to_globals: True
     streams:
@@ -211,22 +211,22 @@ pipeline:
   ################# PIPE 5: C1 + C2 + C3 questions #################
 
   # Answer encoding for PIPE 5.
-  pipe5_c123_answer_indexer:
+  pipe5_c123_without_yn_answer_indexer:
     type: LabelIndexer
     priority: 5.1
     data_folder: ~/data/vqa-med
-    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
     # Export mappings and size to globals.
     export_word_mappings_to_globals: True
     streams:
       inputs: answers
-      outputs: pipe5_c123_answers_ids
+      outputs: pipe5_c123_without_yn_answers_ids
     globals:
       vocabulary_size: vocabulary_size_c123_without_yn
       word_mappings: word_mappings_c123_without_yn
 
   # Sample masking based on categories.
-  pipe5_c123_string_to_mask:
+  pipe5_c123_without_yn_string_to_mask:
     priority: 5.2
     type: StringToMask
     globals:
@@ -234,33 +234,33 @@ pipeline:
     streams:
       strings: pipe0_predicted_question_categories_names
       string_indices: predicted_c123_by_question_categories_indices # NOT USED
-      masks: pipe5_c123_masks
+      masks: pipe5_c123_without_yn_masks
 
   # Model 4: FFN C1 answering
-  pipe5_c123_ffn:
+  pipe5_c123_without_yn_ffn:
     priority: 5.3
     type: FeedForwardNetwork
-    hidden: [50]
+    hidden: [100]
     dropout_rate: 0.5
     streams:
       inputs: concatenated_activations
-      predictions: pipe5_c123_predictions
+      predictions: pipe5_c123_without_yn_predictions
     globals:
       input_size: concatenated_activations_size
       prediction_size: vocabulary_size_c123_without_yn
 
-  pipe5_c123_nllloss:
+  pipe5_c123_without_yn_nllloss:
     type: NLLLoss
     priority: 5.4
     targets_dim: 1
     use_masking: True
     streams:
-      predictions: pipe5_c123_predictions
-      masks: pipe5_c123_masks
-      targets: pipe5_c123_answers_ids
-      loss: pipe5_c123_loss
+      predictions: pipe5_c123_without_yn_predictions
+      masks: pipe5_c123_without_yn_masks
+      targets: pipe5_c123_without_yn_answers_ids
+      loss: pipe5_c123_without_yn_loss
 
-  pipe5_c123_precision_recall:
+  pipe5_c123_without_yn_precision_recall:
     type: PrecisionRecallStatistics
     priority: 5.5
     use_word_mappings: True
@@ -268,15 +268,15 @@ pipeline:
     show_class_scores: True
     #show_confusion_matrix: True
     streams:
-      masks: pipe5_c123_masks
-      predictions: pipe5_c123_predictions
-      targets: pipe5_c123_answers_ids
+      masks: pipe5_c123_without_yn_masks
+      predictions: pipe5_c123_without_yn_predictions
+      targets: pipe5_c123_without_yn_answers_ids
     globals:
       word_mappings: word_mappings_c123_without_yn
     statistics:
-      precision: pipe5_c123_precision
-      recall: pipe5_c123_recall
-      f1score: pipe5_c123_f1score
+      precision: pipe5_c123_without_yn_precision
+      recall: pipe5_c123_without_yn_recall
+      f1score: pipe5_c123_without_yn_f1score
 
   # C123 Predictions decoder.
   pipe5_prediction_decoder:
@@ -285,8 +285,8 @@ pipeline:
     # Use the same word mappings as label indexer.
     import_word_mappings_from_globals: True
     streams:
-      inputs: pipe5_c123_predictions
-      outputs: pipe5_c123_predicted_answers
+      inputs: pipe5_c123_without_yn_predictions
+      outputs: predicted_answers
     globals:
       word_mappings: word_mappings_c123_without_yn
 
@@ -300,8 +300,8 @@ pipeline:
     input_streams:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,
-      pipe5_c123_masks,
-      answers, pipe5_c123_predicted_answers
+      pipe5_c123_without_yn_masks,
+      answers, predicted_answers
 
 
 #: pipeline

From a3ff779311ca6da32d8f83fef14170a2cd961a01 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 00:52:34 -0700
Subject: [PATCH 19/28] lstm_resnet50_is_cat_ffn_c123_no_binary_loss

---
 .../lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml
index 88a3bd7..2364e06 100644
--- a/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml
@@ -155,7 +155,7 @@ pipeline:
     type: LabelIndexer
     priority: 1.3
     data_folder: ~/data/vqa-med
-    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
     # Export mappings and size to globals.
     export_word_mappings_to_globals: True
     streams:
@@ -215,7 +215,7 @@ pipeline:
     type: LabelIndexer
     priority: 5.1
     data_folder: ~/data/vqa-med
-    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
     # Export mappings and size to globals.
     export_word_mappings_to_globals: True
     streams:
@@ -240,7 +240,7 @@ pipeline:
   pipe5_c123_ffn:
     priority: 5.3
     type: FeedForwardNetwork
-    hidden: [50]
+    hidden: [100]
     dropout_rate: 0.5
     streams:
       inputs: concatenated_activations
@@ -286,7 +286,7 @@ pipeline:
     import_word_mappings_from_globals: True
     streams:
       inputs: pipe5_c123_predictions
-      outputs: pipe5_c123_predicted_answers
+      outputs: predicted_answers
     globals:
       word_mappings: word_mappings_c123_without_yn
 
@@ -301,7 +301,7 @@ pipeline:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,
       pipe5_c123_masks,
-      answers, pipe5_c123_predicted_answers
+      answers, predicted_answers
 
 
 #: pipeline

From 48c7287684254ef220c0fd9a0ed31d35d51805ac Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 01:06:28 -0700
Subject: [PATCH 20/28] lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml

---
 .../vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml
index cba072b..2a46463 100644
--- a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml
@@ -155,7 +155,7 @@ pipeline:
     priority: 1.3
     type: LabelIndexer
     data_folder: ~/data/vqa-med
-    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
     # Export mappings and size to globals.
     export_word_mappings_to_globals: True
     streams:
@@ -314,7 +314,7 @@ pipeline:
     import_word_mappings_from_globals: True
     streams:
       inputs: pipe6_c123_predictions
-      outputs: pipe6_c123_predicted_answers
+      outputs: predicted_answers
     globals:
       word_mappings: word_mappings_c123_without_yn
 
@@ -328,7 +328,7 @@ pipeline:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,
       pipe6_c123_masks,
-      answers, pipe6_c123_predicted_answers
+      answers, predicted_answers
 
 
 #: pipeline

From 9f935ab87c0d38e77e192a48ca73bfe6320c00f5 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 01:27:59 -0700
Subject: [PATCH 21/28] lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml

---
 ...0_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml | 144 ++++++++++++++++--
 .../vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml |   4 +-
 2 files changed, 131 insertions(+), 17 deletions(-)

diff --git a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml
index b0db77b..1b3f29d 100644
--- a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml
@@ -32,8 +32,8 @@ pipeline:
     priority: 0
     type: GlobalVariablePublisher
     # Add input_size to globals.
-    keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c123_without_yn_word_to_ix]
-    values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}]
+    keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c123_without_yn_word_to_ix,category_binary_yn_word_to_ix]
+    values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}, {"BINARY": 3}]
 
   # Statistics.
   batch_size:
@@ -162,8 +162,8 @@ pipeline:
       inputs: answers
       outputs: all_answers_ids
     globals:
-      vocabulary_size: vocabulary_size_c123_without_yn
-      word_mappings: word_mappings_c123_without_yn
+      vocabulary_size: vocabulary_size_c123_binary_yn
+      word_mappings: word_mappings_c123_binary_yn
 
   ################# PIPE 2: SHARED IMAGE ENCODER #################
 
@@ -243,7 +243,7 @@ pipeline:
     priority: 6.1
     type: LabelIndexer
     data_folder: ~/data/vqa-med
-    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
     # Export mappings and size to globals.
     export_word_mappings_to_globals: True
     streams:
@@ -307,25 +307,139 @@ pipeline:
       f1score: pipe6_c123_f1score
 
   # C123 Predictions decoder.
-  pipe5_c123_prediction_decoder:
-    priority: 6.6
-    type: WordDecoder
-    # Use the same word mappings as label indexer.
-    import_word_mappings_from_globals: True
+  #pipe5_c123_prediction_decoder:
+  #  priority: 6.6
+  #  type: WordDecoder
+  #  # Use the same word mappings as label indexer.
+  #  import_word_mappings_from_globals: True
+  #  streams:
+  #    inputs: pipe6_c123_predictions
+  #    outputs: pipe6_c123_predicted_answers
+  #  globals:
+  #    word_mappings: word_mappings_c123_without_yn
+
+
+  ################# PIPE 7: Y/N questions #################
+
+  # Answer encoding for PIPE 5.
+  pipe7_binary_yn_answer_indexer:
+    type: LabelIndexer
+    priority: 7.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
     streams:
-      inputs: pipe6_c123_predictions
-      outputs: pipe6_c123_predicted_answers
+      inputs: answers
+      outputs: pipe7_binary_yn_answers_ids
     globals:
-      word_mappings: word_mappings_c123_without_yn
-
+      vocabulary_size: vocabulary_size_binary_yn
+      word_mappings: word_mappings_binary_yn
 
+  # Sample masking based on categories.
+  pipe7_binary_yn_string_to_mask:
+    priority: 7.2
+    type: StringToMask
+    globals:
+      word_mappings: category_binary_yn_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_binary_question_categories_indices # NOT USED
+      masks: pipe7_binary_yn_masks
 
+  # Model 4: FFN C1 answering
+  pipe7_binary_yn_classifier:
+    priority: 7.3
+    type: FeedForwardNetwork
+    hidden: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe7_binary_yn_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_binary_yn
 
+  pipe7_binary_yn_nllloss:
+    type: NLLLoss
+    priority: 7.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe7_binary_yn_predictions
+      masks: pipe7_binary_yn_masks
+      targets: pipe7_binary_yn_answers_ids
+      loss: pipe7_binary_yn_loss
 
+  pipe7_binary_yn_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 7.5
+    use_word_mappings: True
+    use_masking: True
+    show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe7_binary_yn_masks
+      predictions: pipe7_binary_yn_predictions
+      targets: pipe7_binary_yn_answers_ids
+    globals:
+      word_mappings: word_mappings_binary_yn
+    statistics:
+      precision: pipe7_binary_yn_precision
+      recall: pipe7_binary_yn_recall
+      f1score: pipe7_binary_yn_f1score
+
+  # Y/N Predictions decoder.
+  #pipe7_binary_yn_prediction_decoder:
+  #  type: WordDecoder
+  #  priority: 7.6
+  #  # Use the same word mappings as label indexer.
+  #  import_word_mappings_from_globals: True
+  #  streams:
+  #    inputs: pipe7_binary_yn_predictions
+  #    outputs: pipe7_binary_yn_predicted_answers
+  #  globals:
+  #    word_mappings: word_mappings_binary_yn
 
 
   ################# PIPE 9: MERGE ANSWERS #################
 
+  # Merge predictions
+  pipe8_merged_predictions:
+    type: JoinMaskedPredictions
+    priority: 8.1
+    # Names of used input streams.
+    input_prediction_streams: [pipe6_c123_predictions, pipe7_binary_yn_predictions]
+    input_mask_streams: [pipe6_c123_masks, pipe7_binary_yn_masks]
+    input_word_mappings: [word_mappings_c123_without_yn, word_mappings_binary_yn]
+    globals:
+      output_word_mappings: word_mappings_c123_binary_yn
+    streams:
+      output_strings: predicted_answers
+      output_indices: pipe8_merged_pred_indices
+
+  # Statistics.
+  pipe8_merged_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 8.2
+    # Use prediction indices instead of distributions.
+    use_prediction_distributions: False
+    use_word_mappings: True
+    show_class_scores: True
+    show_confusion_matrix: True
+    globals:
+      word_mappings: word_mappings_c123_binary_yn
+    streams:
+      targets: all_answers_ids
+      predictions: pipe8_merged_pred_indices
+    statistics:
+      precision: pipe8_merged_precision
+      recall: pipe8_merged_recall
+      f1score: pipe8_merged_f1score
+
+
+
+
   # Viewers.
   viewer:
     priority: 9.3
@@ -334,7 +448,7 @@ pipeline:
       tokenized_questions, category_names,
       pipe0_predicted_question_categories_names,
       pipe6_c123_masks,
-      answers, pipe6_c123_predicted_answers
+      answers, predicted_answers
 
 
 #: pipeline
diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml
index 03c2cea..7cbe09e 100644
--- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml
+++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml
@@ -206,7 +206,7 @@ pipeline:
       output_size: concatenated_activations_size
 
 
-  ################# PIPE 5: C1 + C2 + C3 questions #################
+  ################# PIPE 5: Y/N questions #################
 
   # Answer encoding for PIPE 5.
   pipe5_binary_yn_answer_indexer:
@@ -276,7 +276,7 @@ pipeline:
       recall: pipe5_binary_yn_recall
       f1score: pipe5_binary_yn_f1score
 
-  # C123 Predictions decoder.
+  # Y/N Predictions decoder.
   pipe5_prediction_decoder:
     type: WordDecoder
     priority: 5.6

From 7db986fddb5901def63059cda67861474833dba5 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 01:39:20 -0700
Subject: [PATCH 22/28] increased loss treshold: 1e-3

---
 configs/vqa_med_2019/default_vqa_med_2019.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/vqa_med_2019/default_vqa_med_2019.yml b/configs/vqa_med_2019/default_vqa_med_2019.yml
index 11d7222..dfe01a6 100644
--- a/configs/vqa_med_2019/default_vqa_med_2019.yml
+++ b/configs/vqa_med_2019/default_vqa_med_2019.yml
@@ -22,7 +22,7 @@ training:
 
   # Terminal conditions:
   terminal_conditions:
-    loss_stop: 1.0e-2
+    loss_stop: 1.0e-3
     episode_limit: 10000
     epoch_limit: -1
 

From 39242f6acf1cad0c576f8e94bba8391d0048fc81 Mon Sep 17 00:00:00 2001
From: Alexis Asseman <33075224+aasseman@users.noreply.github.com>
Date: Tue, 30 Apr 2019 09:27:31 -0700
Subject: [PATCH 23/28] Add option to ignore words in BLEU

---
 .../default/components/publishers/bleu_statistics.yml |  3 +++
 ptp/components/publishers/bleu_statistics.py          | 11 +++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/configs/default/components/publishers/bleu_statistics.yml b/configs/default/components/publishers/bleu_statistics.yml
index a79a245..c51f387 100644
--- a/configs/default/components/publishers/bleu_statistics.yml
+++ b/configs/default/components/publishers/bleu_statistics.yml
@@ -13,6 +13,9 @@ use_prediction_distributions: True
 # TODO!
 #use_masking: False
 
+# Ignored words - useful for ignoring special tokens
+ignored_words: ["<PAD>", "<EOS>"]
+
 # Weights of n-grams used when calculating the score.
 weights: [0.25, 0.25, 0.25, 0.25]
 
diff --git a/ptp/components/publishers/bleu_statistics.py b/ptp/components/publishers/bleu_statistics.py
index b303ea9..6432c06 100644
--- a/ptp/components/publishers/bleu_statistics.py
+++ b/ptp/components/publishers/bleu_statistics.py
@@ -58,6 +58,9 @@ def __init__(self, name, config):
         # Get masking flag.
         #self.use_masking = self.config["use_masking"]
 
+        # Get ignored words
+        self.ignored_words = self.config["ignored_words"]
+
         # Retrieve word mappings from globals.
         word_to_ix = self.globals["word_mappings"]
         # Construct reverse mapping for faster processing.
@@ -144,12 +147,16 @@ def calculate_BLEU(self, data_dict):
             target_words = []
             for t_ind in target_indices:
                 if t_ind in self.ix_to_word.keys():
-                    target_words.append(self.ix_to_word[t_ind])
+                    w = self.ix_to_word[t_ind]
+                    if w not in self.ignored_words:
+                        target_words.append(w)
             # Change prediction indices to words.
             pred_words = []
             for p_ind in pred_indices:
                 if p_ind in self.ix_to_word.keys():
-                    pred_words.append(self.ix_to_word[p_ind])
+                    w = self.ix_to_word[p_ind]
+                    if w not in self.ignored_words:
+                        pred_words.append(w)
             # Calculate BLEU.
             scores.append(sentence_bleu([target_words], pred_words, self.weights))
             #print("TARGET: {}\n".format(target_words))

From ecc69df2ae743aa59e332497c9fa02d40c47892e Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 10:29:59 -0700
Subject: [PATCH 24/28] extend answers - added second exported that creates the
 submission file

---
 ...ult_extend_answers.yml => extend_answers.yml} | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)
 rename configs/vqa_med_2019/{default_extend_answers.yml => extend_answers.yml} (90%)

diff --git a/configs/vqa_med_2019/default_extend_answers.yml b/configs/vqa_med_2019/extend_answers.yml
similarity index 90%
rename from configs/vqa_med_2019/default_extend_answers.yml
rename to configs/vqa_med_2019/extend_answers.yml
index 97e9ddf..965263e 100644
--- a/configs/vqa_med_2019/default_extend_answers.yml
+++ b/configs/vqa_med_2019/extend_answers.yml
@@ -58,7 +58,7 @@ test_answers:
 # Add component for exporting answers to files.
 pipeline:
   disable: viewer,question_tokenizer
-#  # Viewers.
+  # Viewers.
   viewer_extended:
     priority: 100.4
     type: StreamViewer
@@ -68,13 +68,23 @@ pipeline:
       category_names,predicted_categories,
       answers,tokenized_answers,predicted_answers
 
-  exporter:
+  answer_exporter:
     priority: 100.5
     type: StreamFileExporter
-    separator: '|'
+    separator: ','
+    filename: 'answers.csv'
     input_streams: 
       indices,image_ids,tokenized_questions,
       category_names,predicted_categories,
       answers,tokenized_answers,predicted_answers
 
+  submission_exporter:
+    priority: 100.6
+    type: StreamFileExporter
+    separator: '|'
+    filename: 'submission.txt'
+    input_streams: 
+      image_ids,
+      predicted_answers
+
 #: pipeline

From f0c037cdbb992942d18e5af5c0b6283f48de8929 Mon Sep 17 00:00:00 2001
From: Deepta Rajan <drajan@us.ibm.com>
Date: Tue, 30 Apr 2019 11:17:16 -0700
Subject: [PATCH 25/28] vqa attention

---
 .../components/models/vqa/attention.yml       |  51 ++++++
 .../c2_class_lstm_resnet50_attn_cat_is.yml    | 120 +++++++++++++
 ptp/components/models/__init__.py             |   4 +-
 ptp/components/models/vqa/attention.py        | 163 ++++++++++++++++++
 4 files changed, 337 insertions(+), 1 deletion(-)
 create mode 100644 configs/default/components/models/vqa/attention.yml
 create mode 100644 configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml
 create mode 100644 ptp/components/models/vqa/attention.py

diff --git a/configs/default/components/models/vqa/attention.yml b/configs/default/components/models/vqa/attention.yml
new file mode 100644
index 0000000..97557bd
--- /dev/null
+++ b/configs/default/components/models/vqa/attention.yml
@@ -0,0 +1,51 @@
+# This file defines the default values for the VQA_Attention model.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Dropout rate (LOADED)
+# Default: 0 (means that it is turned off)
+dropout_rate: 0
+
+streams:
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of encoded images (INPUT)
+  feature_maps: feature_maps
+
+  # Stream containing batch of encoded questions (INPUT)
+  question_encodings: question_encodings
+
+  # Stream containing outputs (OUTPUT)
+  outputs: outputs
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  # Height of the features tensor (RETRIEVED)
+  feature_maps_height: feature_maps_height
+
+  # Width of the features tensor (RETRIEVED)
+  feature_maps_width: feature_maps_width
+
+  # Depth of the features tensor (RETRIEVED)
+  feature_maps_depth: feature_maps_depth
+  
+  # Size of the question encodings input (RETRIEVED)
+  question_encoding_size: question_encoding_size
+
+  # Size of the output (RETRIEVED)
+  output_size: output_size
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml
new file mode 100644
index 0000000..9b2f1fe
--- /dev/null
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml
@@ -0,0 +1,120 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
+
+training:
+  problem:
+    batch_size: 48
+    # Appy all preprocessing/data augmentations.
+    image_preprocessing: normalize
+    # none | random_affine | random_horizontal_flip | normalize | all
+    question_preprocessing: all
+    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+    streams:
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+validation:
+  problem:
+    batch_size: 48
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+    streams:
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+
+pipeline:
+  name: c2_class_lstm_resnet50_attn_cat_is
+
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    # Add input_size to globals.
+    keys: [question_encoder_output_size, latent_size, num_attention_heads, attention_activation_size, question_image_activation_size]
+    values: [100, 100, 2, 4196, 300]
+
+  ################# PIPE 0: question #################
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 100
+    pretrained_embeddings_file: glove.6B.100d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions
+
+  # Model 2: RNN
+  question_lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    use_logsoftmax: False
+    initial_state: Trainable
+    dropout_rate: 0.1
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  ################# PIPE 2: image #################
+  # Image encoder.
+  image_encoder:
+    priority: 3.1
+    type: TorchVisionWrapper
+    model_type: resnet50
+    return_feature_maps: True
+    streams:
+      inputs: images
+      outputs: feature_maps #image_activations
+    # globals:
+    #   output_size: image_encoder_output_size
+
+  ################# PIPE 3: image-question fusion  #################
+  # Attention + FF.
+  question_image_fusion:
+    priority: 4.1
+    type: VQA_Attention
+    dropout_rate: 0.5
+    streams:
+      image_encodings: feature_maps #image_activations
+      question_encodings: question_activations
+      outputs: attention_activations
+    globals:
+      question_encoding_size: question_encoder_output_size
+      latent_size: latent_size
+      multi_head_attention: num_attention_heads
+      output_size: attention_activation_size
+
+  # question_image_ffn:
+  #   priority: 4.2
+  #   type: FeedForwardNetwork
+  #   hidden_sizes: [100]
+  #   dropout_rate: 0.5
+  #   streams:
+  #     inputs: attention_activations
+  #     predictions: question_image_activations
+  #   globals:
+  #     input_size: attention_activation_size
+  #     prediction_size: question_image_activation_size
+
+  classifier:
+    priority: 5.1
+    type: FeedForwardNetwork
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: attention_activations #question_image_activations #concatenated_activations
+    globals:
+      input_size: attention_activation_size #question_image_activation_size #concatentated_activations_size
+      prediction_size: vocabulary_size_c2
+
+
+  #: pipeline
diff --git a/ptp/components/models/__init__.py b/ptp/components/models/__init__.py
index 20c2841..81a3868 100644
--- a/ptp/components/models/__init__.py
+++ b/ptp/components/models/__init__.py
@@ -12,6 +12,7 @@
 from .vqa.element_wise_multiplication import ElementWiseMultiplication
 from .vqa.multimodal_compact_bilinear_pooling import MultimodalCompactBilinearPooling
 from .vqa.relational_network import RelationalNetwork
+from .vqa.attention import VQA_Attention
 
 __all__ = [
     'ConvNetEncoder',
@@ -26,5 +27,6 @@
     'ElementWiseMultiplication',
     'MultimodalCompactBilinearPooling',
     'RelationalNetwork',
-    'Attn_Decoder_RNN'
+    'Attn_Decoder_RNN',
+    'VQA_Attention'
     ]
diff --git a/ptp/components/models/vqa/attention.py b/ptp/components/models/vqa/attention.py
new file mode 100644
index 0000000..f3c778d
--- /dev/null
+++ b/ptp/components/models/vqa/attention.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) IBM Corporation 2018
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__author__ = "Deepta Rajan"
+
+
+import torch
+
+from ptp.components.models.model import Model
+from ptp.data_types.data_definition import DataDefinition
+
+
+class VQA_Attention(Model):
+    """
+    Element of one of the classical baselines for Visual Question Answering.
+    Attention-weighted image maps are computed based on the question.
+    The multi-modal data (question and attention-weighted image maps) are fused via concatenation and returned (for subsequent classification, done in a separate component e.g. ffn).
+
+    On the basis of: Vahid Kazemi Ali Elqursh. "Show, Ask, Attend, and Answer: A Strong Baseline For Visual Question Answering" (2017).
+    Code: https://github.com/Cyanogenoid/pytorch-vqa/blob/master/model.py
+    """
+    def __init__(self, name, config):
+        """
+        Initializes the model, creates the required layers.
+
+        :param name: Name of the model (taken from the configuration file).
+
+        :param config: Parameters read from configuration file.
+        :type config: ``ptp.configuration.ConfigInterface``
+
+        """
+        super(VQA_Attention, self).__init__(name, VQA_Attention, config)
+
+        # Get key mappings.
+        self.key_feature_maps = self.stream_keys["feature_maps"]
+        self.key_question_encodings = self.stream_keys["question_encodings"]
+        self.key_outputs = self.stream_keys["outputs"]
+
+        # Retrieve input/output sizes from globals.
+        self.feature_maps_height = self.globals["feature_maps_height"]
+        self.feature_maps_width = self.globals["feature_maps_width"]
+        self.feature_maps_depth = self.globals["feature_maps_depth"]
+        self.question_encoding_size = self.globals["question_encoding_size"]
+        self.latent_size = self.globals["latent_size"] #TO-DO add to yml file
+        self.num_attention_heads = self.globals["multi_head_attention"]
+        # Output feature size
+        self.output_size = self.feature_maps_depth*self.num_attention_heads + self.question_encoding_size
+
+        # Map image and question encodings to a common latent space of dimension 'latent_size'.
+        self.image_encodings_conv = torch.nn.Conv2d(self.feature_maps_depth, self.latent_size, 1, bias=False)
+        self.question_encodings_ff = torch.nn.Linear(self.question_encoding_size, self.latent_size)
+
+        # Scalar-dot product attention function is implemented as a Conv operation
+        self.attention_conv = torch.nn.Conv2d(self.latent_size, self.num_attention_heads, 1)
+
+        # Create activation layer.
+        self.activation = torch.nn.ReLU()
+
+        # Retrieve dropout rate value - if set, will put dropout between every layer.
+        dropout_rate = self.config["dropout_rate"]
+
+        # Create dropout layer.
+        self.dropout = torch.nn.Dropout(dropout_rate)
+
+
+    def input_data_definitions(self):
+        """
+        Function returns a dictionary with definitions of input data that are required by the component.
+
+        :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        return {
+            self.key_feature_maps: DataDefinition([-1, self.feature_maps_depth, self.feature_maps_height, self.feature_maps_width], [torch.Tensor], "Batch of feature maps [BATCH_SIZE x FEAT_DEPTH x FEAT_HEIGHT x FEAT_WIDTH]"),
+            self.key_question_encodings: DataDefinition([-1, self.question_encoding_size], [torch.Tensor], "Batch of encoded questions [BATCH_SIZE x QUESTION_ENCODING_SIZE]"),
+            }
+
+
+    def output_data_definitions(self):
+        """
+        Function returns a dictionary with definitions of output data produced the component.
+
+        :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        return {
+            self.key_outputs: DataDefinition([-1, self.output_size], [torch.Tensor], "Batch of outputs [BATCH_SIZE x OUTPUT_SIZE]")
+            }
+
+    def forward(self, data_dict):
+        """
+        Main forward pass of the model.
+
+        :param data_dict: DataDict({'images',**})
+        :type data_dict: ``ptp.dadatypes.DataDict``
+        """
+
+        # Unpack DataDict.
+        enc_img = data_dict[self.key_feature_maps] #[48, 2048, 7, 7]
+        enc_q = data_dict[self.key_question_encodings] #[48, 100]
+        # print("im_enc", enc_img.shape)
+        # print("enc_q", enc_q.shape)
+
+        # L2 norm of image encoding
+        enc_img = enc_img / (enc_img.norm(p=2, dim=1, keepdim=True).expand_as(enc_img) + 1e-8)
+
+        # Compute attention maps for image using questions
+        latent_img = self.image_encodings_conv(self.dropout(enc_img)) # [48, 100, 7, 7]
+        # print("latent_im", latent_img.shape)
+        latent_q =  self.question_encodings_ff(self.dropout(enc_q)) # [48, 100]
+        # print("latent_q", latent_q.shape)
+        latent_q_tile = tile_2d_over_nd(latent_q, latent_img) # [48, 100, 7, 7]
+        # print("latent_q_tile", latent_q_tile.shape)
+        attention = self.activation(latent_img + latent_q_tile) #
+        # print("attention", attention.shape)
+        attention = self.attention_conv(self.dropout(attention)) # [48, 2, 7, 7]
+        # print("attention", attention.shape)
+
+        # Apply attention to image encoding
+        attention_enc_img = apply_attention(enc_img, attention) # [48, 2048, 7, 7], [48, 2, 7, 7]
+        # print("attention im", attention_enc_img.shape)
+
+        # Fusion -- Concatenate attention-weighted image encodings and question encodings.
+        outputs = torch.cat([attention_enc_img, latent_q], dim=1)
+        # print("outputs", outputs.shape)
+        # Add predictions to datadict.
+        data_dict.extend({self.key_outputs: outputs})
+
+
+def tile_2d_over_nd(feature_vector, feature_map):
+    """ Repeat the same feature vector over all spatial positions of a given feature map.
+        The feature vector should have the same batch size and number of features as the feature map.
+    """
+    n, c = feature_vector.size()
+    spatial_size = feature_map.dim() - 2
+    tiled = feature_vector.view(n, c, *([1] * spatial_size)).expand_as(feature_map)
+    return tiled
+
+
+def apply_attention(input, attention):
+    """ Apply any number of attention maps over the input. """
+    n, c = input.size()[:2]
+    glimpses = attention.size(1) # glimpses is equivalent to multiple heads in attention
+
+    # flatten the spatial dims into the third dim, since we don't need to care about how they are arranged
+    input = input.view(n, 1, c, -1) # [n, 1, c, s] [batch, 1, channels, height*width] [48, 1, 2048, 7*7]
+    attention = attention.view(n, glimpses, -1) # [48, 2, 7*7]
+    attention = torch.nn.functional.softmax(attention, dim=-1).unsqueeze(2) # [n, g, 1, s] [batch, multi_head, 1, height*width] [48, 2, 1, 7*7]
+    weighted = attention * input # [n, g, c, s] [48, 2, 2048, 7*7]
+    weighted_mean = weighted.sum(dim=-1) # [n, g, c] [48, 2, 2048]
+    return weighted_mean.view(n, -1) # [48, 4196]

From e486dc7266cd544dee26b696b576bf8494c519c7 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 11:26:02 -0700
Subject: [PATCH 26/28] separator export added to stream_file_exporter

---
 .../default/components/publishers/stream_file_exporter.yml   | 3 +++
 configs/vqa_med_2019/extend_answers.yml                      | 3 ++-
 ptp/components/publishers/stream_file_exporter.py            | 5 +++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/configs/default/components/publishers/stream_file_exporter.yml b/configs/default/components/publishers/stream_file_exporter.yml
index 1a5546f..3d83b20 100644
--- a/configs/default/components/publishers/stream_file_exporter.yml
+++ b/configs/default/components/publishers/stream_file_exporter.yml
@@ -11,6 +11,9 @@ input_streams: ''
 # Separator that will be placed between values (LOADED)
 separator: ','
 
+# Adds additional line to output file enabling Excel to use different separator while loading (LOADED)
+export_separator_line_to_csv: False
+
 # Name of the file containing output values (LOADED)
 filename: 'outputs.txt'
 
diff --git a/configs/vqa_med_2019/extend_answers.yml b/configs/vqa_med_2019/extend_answers.yml
index 965263e..9e2f9a4 100644
--- a/configs/vqa_med_2019/extend_answers.yml
+++ b/configs/vqa_med_2019/extend_answers.yml
@@ -71,8 +71,9 @@ pipeline:
   answer_exporter:
     priority: 100.5
     type: StreamFileExporter
-    separator: ','
+    separator: '|'
     filename: 'answers.csv'
+    export_separator_line_to_csv: True
     input_streams: 
       indices,image_ids,tokenized_questions,
       category_names,predicted_categories,
diff --git a/ptp/components/publishers/stream_file_exporter.py b/ptp/components/publishers/stream_file_exporter.py
index ec34ea5..3189e4c 100644
--- a/ptp/components/publishers/stream_file_exporter.py
+++ b/ptp/components/publishers/stream_file_exporter.py
@@ -55,6 +55,11 @@ def __init__(self, name, config):
         filename = self.config["filename"]
         abs_filename = path.join(self.app_state.log_dir, filename)
         self.file = open(abs_filename, 'w')
+
+        # Export additional line.
+        if self.config["export_separator_line_to_csv"]:
+            self.file.write("sep={}\n",self.separator)            
+
         self.logger.info("Writing values from {} streams to {}".format(self.input_stream_keys, abs_filename))
 
 

From 61be38d516fa06982135d2285da5a3fcee2a2555 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 11:27:01 -0700
Subject: [PATCH 27/28] separator export fix

---
 ptp/components/publishers/stream_file_exporter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ptp/components/publishers/stream_file_exporter.py b/ptp/components/publishers/stream_file_exporter.py
index 3189e4c..64cf46b 100644
--- a/ptp/components/publishers/stream_file_exporter.py
+++ b/ptp/components/publishers/stream_file_exporter.py
@@ -58,7 +58,7 @@ def __init__(self, name, config):
 
         # Export additional line.
         if self.config["export_separator_line_to_csv"]:
-            self.file.write("sep={}\n",self.separator)            
+            self.file.write("sep={}\n".format(self.separator))
 
         self.logger.info("Writing values from {} streams to {}".format(self.input_stream_keys, abs_filename))
 

From 3bae683ca035a41c944e04682516fffc96a80bb6 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 30 Apr 2019 15:29:54 -0700
Subject: [PATCH 28/28] attention cleanups

---
 .../components/models/vqa/attention.yml       |  7 ++++
 .../c2_class_lstm_resnet50_attn_cat_is.yml    | 38 +++++--------------
 .../default_c2_classification.yml             |  2 +-
 ptp/components/models/vqa/attention.py        |  7 +++-
 4 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/configs/default/components/models/vqa/attention.yml b/configs/default/components/models/vqa/attention.yml
index 97557bd..830f4b8 100644
--- a/configs/default/components/models/vqa/attention.yml
+++ b/configs/default/components/models/vqa/attention.yml
@@ -8,6 +8,13 @@
 # Default: 0 (means that it is turned off)
 dropout_rate: 0
 
+# Size of the latent space (LOADED)
+latent_size: 100
+
+# Number of attention heads (LOADED)
+num_attention_heads: 2
+
+
 streams:
   ####################################################################
   # 2. Keymappings associated with INPUT and OUTPUT streams.
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml
index 9b2f1fe..08b043e 100644
--- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml
@@ -5,10 +5,7 @@ training:
   problem:
     batch_size: 48
     # Appy all preprocessing/data augmentations.
-    image_preprocessing: normalize
-    # none | random_affine | random_horizontal_flip | normalize | all
-    question_preprocessing: all
-    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+    question_preprocessing: lowercase,remove_punctuation,tokenize
     streams:
       # Problem is returning tokenized questions.
       questions: tokenized_questions
@@ -16,22 +13,21 @@ training:
 validation:
   problem:
     batch_size: 48
+    # Appy all preprocessing/data augmentations.
     question_preprocessing: lowercase,remove_punctuation,tokenize
-    # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
     streams:
       # Problem is returning tokenized questions.
       questions: tokenized_questions
 
 
 pipeline:
-  name: c2_class_lstm_resnet50_attn_cat_is
 
   global_publisher:
     priority: 0
     type: GlobalVariablePublisher
     # Add input_size to globals.
-    keys: [question_encoder_output_size, latent_size, num_attention_heads, attention_activation_size, question_image_activation_size]
-    values: [100, 100, 2, 4196, 300]
+    keys: [question_encoder_output_size, attention_activation_size, question_image_activation_size]
+    values: [100, 4196, 300]
 
   ################# PIPE 0: question #################
 
@@ -73,9 +69,7 @@ pipeline:
     return_feature_maps: True
     streams:
       inputs: images
-      outputs: feature_maps #image_activations
-    # globals:
-    #   output_size: image_encoder_output_size
+      outputs: feature_maps
 
   ################# PIPE 3: image-question fusion  #################
   # Attention + FF.
@@ -83,37 +77,25 @@ pipeline:
     priority: 4.1
     type: VQA_Attention
     dropout_rate: 0.5
+    latent_size: 100
+    num_attention_heads: 2
     streams:
-      image_encodings: feature_maps #image_activations
+      image_encodings: feature_maps
       question_encodings: question_activations
       outputs: attention_activations
     globals:
       question_encoding_size: question_encoder_output_size
-      latent_size: latent_size
-      multi_head_attention: num_attention_heads
       output_size: attention_activation_size
 
-  # question_image_ffn:
-  #   priority: 4.2
-  #   type: FeedForwardNetwork
-  #   hidden_sizes: [100]
-  #   dropout_rate: 0.5
-  #   streams:
-  #     inputs: attention_activations
-  #     predictions: question_image_activations
-  #   globals:
-  #     input_size: attention_activation_size
-  #     prediction_size: question_image_activation_size
-
   classifier:
     priority: 5.1
     type: FeedForwardNetwork
     hidden_sizes: [100]
     dropout_rate: 0.5
     streams:
-      inputs: attention_activations #question_image_activations #concatenated_activations
+      inputs: attention_activations
     globals:
-      input_size: attention_activation_size #question_image_activation_size #concatentated_activations_size
+      input_size: attention_activation_size
       prediction_size: vocabulary_size_c2
 
 
diff --git a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml
index b4b08d0..9511a28 100644
--- a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml
+++ b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml
@@ -82,6 +82,6 @@ pipeline:
   viewer:
     type: StreamViewer
     priority: 100.4
-    input_streams: questions,tokenized_questions,category_names,answers,predicted_answers
+    input_streams: tokenized_questions,category_names,answers,predicted_answers
 
 #: pipeline
diff --git a/ptp/components/models/vqa/attention.py b/ptp/components/models/vqa/attention.py
index f3c778d..15c7914 100644
--- a/ptp/components/models/vqa/attention.py
+++ b/ptp/components/models/vqa/attention.py
@@ -55,8 +55,11 @@ def __init__(self, name, config):
         self.feature_maps_width = self.globals["feature_maps_width"]
         self.feature_maps_depth = self.globals["feature_maps_depth"]
         self.question_encoding_size = self.globals["question_encoding_size"]
-        self.latent_size = self.globals["latent_size"] #TO-DO add to yml file
-        self.num_attention_heads = self.globals["multi_head_attention"]
+
+        # Get size of latent space and number of heads from config.
+        self.latent_size = self.config["latent_size"]
+        self.num_attention_heads = self.config["num_attention_heads"]
+
         # Output feature size
         self.output_size = self.feature_maps_depth*self.num_attention_heads + self.question_encoding_size