From a86a5433250f8796783a0e1b560571f218de9190 Mon Sep 17 00:00:00 2001
From: Alexis Asseman <33075224+aasseman@users.noreply.github.com>
Date: Thu, 18 Apr 2019 15:39:36 -0700
Subject: [PATCH 01/39] Added more modes to RNN (untested): - Optional
 input/output stream for hidden state - Autoregressive mode: First / No inputs

---
 .../models/recurrent_neural_network.yml       |  22 ++-
 .../models/recurrent_neural_network.py        | 126 ++++++++++++------
 2 files changed, 106 insertions(+), 42 deletions(-)

diff --git a/configs/default/components/models/recurrent_neural_network.yml b/configs/default/components/models/recurrent_neural_network.yml
index 3249555..f43a5bf 100644
--- a/configs/default/components/models/recurrent_neural_network.yml
+++ b/configs/default/components/models/recurrent_neural_network.yml
@@ -9,7 +9,15 @@ hidden_size: 100
 
 # Flag informing the model to learn the intial state (h0/c0) (LOADED)
 # When false, (c0/c0) will be initialized as zeros.
-initial_state_trainable: True
+
+# Initial state type:
+#   * Zero (null vector)
+#   * Trainable (xavier initialization, trainable)
+#   * Input (the initial hidden state comes from an input stream)
+initial_state: Trainable
+
+# Wether to include the last hidden state in the outputs
+output_last_state: False
 
 # Type of recurrent cell (LOADED)
 # Options: LSTM | GRU | RNN_TANH | RNN_RELU
@@ -25,9 +33,19 @@ dropout_rate: 0
 # Prediction mode (LOADED)
 # Options: 
 #   * Dense (passes every activation through output layer) |
-#   * Last (passes only the last activation though output layer)
+#   * Last (passes only the last activation though output layer) |
+#   * None (all outputs are discarded)
 prediction_mode: Dense
 
+# Input mode
+# Options:
+#   * Dense (every iteration expects an input)
+#   * Autoregression_First (Autoregression, expects an input for the first iteration)
+#   * Autoregression_None (Autoregression, first input will be a null vector)
+input_mode: Dense
+
+autoregression_length: 42
+
 # If true, output of the last layer will be additionally processed with Log Softmax (LOADED)
 use_logsoftmax: True
 
diff --git a/ptp/components/models/recurrent_neural_network.py b/ptp/components/models/recurrent_neural_network.py
index 612c6dd..d06a48a 100644
--- a/ptp/components/models/recurrent_neural_network.py
+++ b/ptp/components/models/recurrent_neural_network.py
@@ -35,9 +35,22 @@ def __init__(self, name, config):
         # Call constructors of parent classes.
         Model.__init__(self, name, RecurrentNeuralNetwork, config)
 
-        # Get key mappings.
-        self.key_inputs = self.stream_keys["inputs"]
-        self.key_predictions = self.stream_keys["predictions"]
+        # Get input/output mode
+        self.input_mode = self.config["input_mode"]
+        self.output_last_state = self.config["output_last_state"]
+
+        # Get prediction mode from configuration.
+        self.prediction_mode = self.config["prediction_mode"]
+        if self.prediction_mode not in ['Dense','Last', 'None']:
+            raise ConfigurationError("Invalid 'prediction_mode' (current {}, available {})".format(self.prediction_mode, ['Dense','Last', 'None']))
+
+        self.autoregression_length = self.config["autoregression_length"]
+        
+        # Check if initial state (h0/c0) is zero, trainable, or coming from input stream.
+        self.initial_state = self.config["initial_state"]
+
+        # Get number of layers from config.
+        self.num_layers = self.config["num_layers"]
 
         # Retrieve input size from global variables.
         self.key_input_size = self.global_keys["input_size"]
@@ -56,11 +69,6 @@ def __init__(self, name, config):
             else:
                 raise ConfigurationError("RNN prediction size '{}' must be a single dimension (current {})".format(self.key_prediction_size, self.prediction_size))
         
-        # Get prediction mode from configuration.
-        self.prediction_mode = self.config["prediction_mode"]
-        if self.prediction_mode not in ['Dense','Last']:
-            raise ConfigurationError("Invalid 'prediction_mode' (current {}, available {})".format(self.prediction_mode, ['Dense','Last']))
-
         # Retrieve hidden size from configuration.
         self.hidden_size = self.config["hidden_size"]
         if type(self.hidden_size) == list:
@@ -69,14 +77,12 @@ def __init__(self, name, config):
             else:
                 raise ConfigurationError("RNN hidden_size must be a single dimension (current {})".format(self.hidden_size))
         
-        self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size))
-
-        # Get number of layers from config.
-        self.num_layers = self.config["num_layers"]
-
         # Get dropout rate value from config.
         dropout_rate = self.config["dropout_rate"]
 
+        # Create dropout layer.
+        self.dropout = torch.nn.Dropout(dropout_rate)
+
         # Create RNN depending on the configuration
         self.cell_type = self.config["cell_type"]
         if self.cell_type in ['LSTM', 'GRU']:
@@ -88,18 +94,14 @@ def __init__(self, name, config):
                 nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[self.cell_type]
                 # Create rnn cell.
                 self.rnn_cell = torch.nn.RNN(self.input_size, self.hidden_size, self.num_layers, nonlinearity=nonlinearity, dropout=dropout_rate, batch_first=True)
-
             except KeyError:
                 raise ConfigurationError( "Invalid RNN type, available options for 'cell_type' are ['LSTM', 'GRU', 'RNN_TANH', 'RNN_RELU'] (currently '{}')".format(self.cell_type))
         
-        # Check if initial state (h0/c0) are trainable or not.
-        self.initial_state_trainable = self.config["initial_state_trainable"]
-
-        # Parameters - for a single sample.        
+        # Parameters - for a single sample.
         h0 = torch.zeros(self.num_layers, 1, self.hidden_size)
         c0 = torch.zeros(self.num_layers, 1, self.hidden_size)
 
-        if self.initial_state_trainable:
+        if self.initial_state == "Trainable":
             self.logger.info("Using trainable initial (h0/c0) state")
             # Initialize a single vector used as hidden state.
             # Initialize it using xavier initialization.
@@ -110,15 +112,24 @@ def __init__(self, name, config):
             if self.cell_type == 'LSTM':
                 torch.nn.init.xavier_uniform(c0)
                 self.init_memory = torch.nn.Parameter(c0, requires_grad=True)
-        else:
+        elif self.initial_state == "Zero":
             self.logger.info("Using zero initial (h0/c0) state")
             # We will still embedd it into parameter to enable storing/loading of both types of models by each other.
             self.init_hidden = torch.nn.Parameter(h0, requires_grad=False)
             if self.cell_type == 'LSTM':
                 self.init_memory = torch.nn.Parameter(c0, requires_grad=False)
 
-        # Create dropout layer.
-        self.dropout = torch.nn.Dropout(dropout_rate)
+        # Get key mappings.
+        if "None" not in self.input_mode:
+            self.key_inputs = self.stream_keys["inputs"]
+        if "None" not in self.prediction_mode:
+            self.key_predictions = self.stream_keys["predictions"]
+        if self.initial_state == "Input":
+            self.key_input_state = self.stream_keys["input_state"]
+        if self.output_last_state:
+            self.key_output_state = self.stream_keys["output_state"]
+        
+        self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size))
 
         # Create the output layer.
         self.activation2output = torch.nn.Linear(self.hidden_size, self.prediction_size)
@@ -151,10 +162,17 @@ def input_data_definitions(self):
 
         :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
         """
-        return {
-            self.key_inputs: DataDefinition([-1, -1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]"),
-            }
+        d = {}
+
+        if self.input_mode == "Dense":
+            d[self.key_inputs] = DataDefinition([-1, -1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]")
+        elif self.input_mode == "Autoregression_First":
+            d[self.key_inputs] = DataDefinition([-1, 1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]")
 
+        if "Autoregression" in self.input_mode:
+            d[self.key_input_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size])
+
+        return d
 
     def output_data_definitions(self):
         """ 
@@ -162,17 +180,18 @@ def output_data_definitions(self):
 
         :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
         """
+        d = {}
     
         if self.prediction_mode == "Dense":
-            return {
-                self.key_predictions: DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
-                }
-        else: # "Last"
-            return {
-                # Only last prediction.
-                self.key_predictions: DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
-                }
+            d[self.key_predictions] = DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
+        elif self.prediction_mode == "Last": # "Last"
+            # Only last prediction.
+            d[self.key_predictions] = DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
 
+        if self.output_last_state:
+            d[self.key_output_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size])
+        
+        return d
 
     def forward(self, data_dict):
         """
@@ -185,14 +204,33 @@ def forward(self, data_dict):
         """
         
         # Get inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]
-        inputs = data_dict[self.key_inputs]
-        batch_size = inputs.shape[0]
+        if "None" in self.input_mode:
+            batch_size = data_dict[self.key_input_state].shape[0]
+            inputs = torch.zeros(1, self.input_size, self.hidden_size)
+        else:
+            batch_size = inputs.shape[0]
+            inputs = data_dict[self.key_inputs]
+
+
 
         # Initialize hidden state.
         hidden = self.initialize_hiddens_state(batch_size)
 
         # Propagate inputs through rnn cell.
-        activations, hidden = self.rnn_cell(inputs, hidden)
+        if "Autoregression" in self.input_mode:
+            activations_partial, hidden = self.rnn_cell(inputs, hidden)
+            activations += [activations_partial]
+            for i in range(self.autoregression_length - 1):
+                activations_partial, hidden = self.rnn_cell(activations_partial, hidden)
+                if self.prediction_mode == "Dense":
+                    activations += [activations_partial]
+            if self.prediction_mode == "Dense":
+                activations = torch.stack(activations, 1)
+            else:
+                activations = activations_partial
+        else:
+            activations, hidden = self.rnn_cell(inputs, hidden)
+
         
         # Propagate activations through dropout layer.
         activations = self.dropout(activations)
@@ -211,7 +249,10 @@ def forward(self, data_dict):
             # Log softmax - along PREDICTION dim.
             if self.use_logsoftmax:
                 outputs = self.log_softmax(outputs)
-        else:
+
+            # Add predictions to datadict.
+            data_dict.extend({self.key_predictions: outputs})
+        elif self.prediction_mode == "Last":
             # Pass only the last activation through the output layer.
             outputs = activations.contiguous()[:, -1, :].squeeze()
             # Propagate data through the output layer [BATCH_SIZE x PREDICTION_SIZE]
@@ -219,6 +260,11 @@ def forward(self, data_dict):
             # Log softmax - along PREDICTION dim.
             if self.use_logsoftmax:
                 outputs = self.log_softmax(outputs)
-
-        # Add predictions to datadict.
-        data_dict.extend({self.key_predictions: outputs})
+            # Add predictions to datadict.
+            data_dict.extend({self.key_predictions: outputs})
+        elif self.prediction_mode == "None":
+            # Nothing, since we don't want to keep the RNN's outputs
+            pass
+
+        if self.output_last_state:
+            data_dict.extend({self.key_output_state: hidden})

From 5a7b8482896baffb659816cb9dadf0ff05d9ab67 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 15 Apr 2019 14:39:19 -0700
Subject: [PATCH 02/39] c3_classification_all_bow_vgg16_concat.yml

---
 ...c3_classification_all_bow_vgg16_concat.yml | 95 +++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml

diff --git a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml
new file mode 100644
index 0000000..ab08377
--- /dev/null
+++ b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml
@@ -0,0 +1,95 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c3_classification/default_c3_classification.yml
+
+pipeline:
+  name: vqa_med_c3_classification_all_bow_concat
+
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
+    values: [2, 10, 100]
+
+  # First subpipeline: question.
+  # Questions encoding.
+  question_tokenizer:
+    type: SentenceTokenizer
+    priority: 1.1
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  question_onehot_encoder:
+    type: SentenceOneHotEncoder
+    priority: 1.2
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: tokenized_questions
+      outputs: encoded_questions
+    globals:
+      vocabulary_size: question_vocabulary_size
+
+  question_bow_encoder:
+    type: BOWEncoder
+    priority: 1.3
+    streams:
+      inputs: encoded_questions
+      outputs: question_activations
+    globals:
+        bow_size: question_vocabulary_size
+
+
+  # 2nd subpipeline: image size.
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    #hidden_sizes: [100]
+    priority: 2.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  # 3rd subpipeline: image.
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 3.1
+    streams:
+      inputs: images
+      predictions: image_activations
+    globals:
+      prediction_size: image_encoder_output_size
+  
+  # 4th subpipeline: concatenation + FF.
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [question_activations,image_size_activations,image_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,99],[-1,10],[-1,100]]
+    output_dims: [-1,209]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: output_size
+
+
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    priority: 4.2
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: output_size
+      prediction_size: vocabulary_size_c3
+
+
+  #: pipeline

From 4bd9856244eff1db257e495e82e2146c5b64ce0a Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 15 Apr 2019 15:29:37 -0700
Subject: [PATCH 03/39] c1 update + all bow + vgg16 + size config

---
 ...c1_classification_all_bow_vgg16_concat.yml | 94 +++++++++++++++++++
 .../default_c1_classification.yml             |  3 +-
 ...c3_classification_all_bow_vgg16_concat.yml |  1 -
 3 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml

diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml b/configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml
new file mode 100644
index 0000000..b2e6ce1
--- /dev/null
+++ b/configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml
@@ -0,0 +1,94 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
+
+pipeline:
+  name: vqa_med_c1_classification_all_bow_vgg16_concat
+
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
+    values: [2, 10, 100]
+
+  # First subpipeline: question.
+  # Questions encoding.
+  question_tokenizer:
+    type: SentenceTokenizer
+    priority: 1.1
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  question_onehot_encoder:
+    type: SentenceOneHotEncoder
+    priority: 1.2
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: tokenized_questions
+      outputs: encoded_questions
+    globals:
+      vocabulary_size: question_vocabulary_size
+
+  question_bow_encoder:
+    type: BOWEncoder
+    priority: 1.3
+    streams:
+      inputs: encoded_questions
+      outputs: question_activations
+    globals:
+        bow_size: question_vocabulary_size
+
+
+  # 2nd subpipeline: image size.
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 2.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  # 3rd subpipeline: image.
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 3.1
+    streams:
+      inputs: images
+      predictions: image_activations
+    globals:
+      prediction_size: image_encoder_output_size
+  
+  # 4th subpipeline: concatenation + FF.
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [question_activations,image_size_activations,image_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,99],[-1,10],[-1,100]]
+    output_dims: [-1,209]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: output_size
+
+
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    priority: 4.2
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: output_size
+      prediction_size: vocabulary_size_c1
+
+
+  #: pipeline
diff --git a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml
index 0ae125a..abf631c 100644
--- a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml
+++ b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml
@@ -15,7 +15,6 @@ training:
 validation:
   problem:
     categories: C1
-    batch_size: 500
   dataloader:
     num_workers: 4
 
@@ -25,7 +24,7 @@ pipeline:
   # Answer encoding.
   answer_indexer:
     type: LabelIndexer
-    priority: 2
+    priority: 0.1
     data_folder: ~/data/vqa-med
     word_mappings_file: answers.c1.word.mappings.csv
     # Export mappings and size to globals.
diff --git a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml
index ab08377..1eea8d1 100644
--- a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml
+++ b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml
@@ -46,7 +46,6 @@ pipeline:
   # Model - image size classifier.
   image_size_encoder:
     type: FeedForwardNetwork 
-    #hidden_sizes: [100]
     priority: 2.1
     streams:
       inputs: image_sizes

From ea0681cd042d9b49147986439430d5aa92e4f214 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 15 Apr 2019 15:38:28 -0700
Subject: [PATCH 04/39] renaming components in question categorization

---
 .../question_categorization_onehot_bow.yml             | 10 +++++-----
 .../question_categorization_onehot_rnn.yml             | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_onehot_bow.yml b/configs/vqa_med_2019/question_categorization/question_categorization_onehot_bow.yml
index 79f767a..ced3ad0 100644
--- a/configs/vqa_med_2019/question_categorization/question_categorization_onehot_bow.yml
+++ b/configs/vqa_med_2019/question_categorization/question_categorization_onehot_bow.yml
@@ -12,7 +12,7 @@ pipeline:
       inputs: questions
       outputs: tokenized_questions
 
-  sentence_encoder:
+  question_encoder:
     type: SentenceOneHotEncoder
     priority: 1.2
     data_folder: ~/data/vqa-med
@@ -22,7 +22,7 @@ pipeline:
       inputs: tokenized_questions
       outputs: encoded_questions
     globals:
-      vocabulary_size: sentence_vocabulary_size
+      vocabulary_size: question_vocabulary_size
 
   bow_encoder:
     type: BOWEncoder
@@ -31,17 +31,17 @@ pipeline:
       inputs: encoded_questions
       outputs: bow_questions
     globals:
-        bow_size: sentence_vocabulary_size # Set by sentence_encoder.
+        bow_size: question_vocabulary_size # Set by question_encoder.
 
   # Model
   classifier:
-    type: SoftmaxClassifier
+    type: FeedForwardNetwork
     #freeze: True
     priority: 3
     streams:
       inputs: bow_questions
     globals:
-      input_size: sentence_vocabulary_size # Set by sentence_encoder.
+      input_size: question_vocabulary_size # Set by question_encoder.
       prediction_size: num_categories # C1,C2,C3,C4
 
   # Predictions decoder.
diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_onehot_rnn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_onehot_rnn.yml
index 47031a6..3c80491 100644
--- a/configs/vqa_med_2019/question_categorization/question_categorization_onehot_rnn.yml
+++ b/configs/vqa_med_2019/question_categorization/question_categorization_onehot_rnn.yml
@@ -12,7 +12,7 @@ pipeline:
       inputs: questions
       outputs: tokenized_questions
 
-  sentence_encoder:
+  question_encoder:
     type: SentenceOneHotEncoder
     priority: 1.2
     data_folder: ~/data/vqa-med
@@ -22,9 +22,9 @@ pipeline:
       inputs: tokenized_questions
       outputs: encoded_questions
     globals:
-      vocabulary_size: sentence_vocabulary_size
+      vocabulary_size: question_vocabulary_size
 
-  sentence_to_tensor:
+  question_to_tensor:
     type: ListToTensor
     priority: 1.3
     num_inputs_dims: 3
@@ -32,7 +32,7 @@ pipeline:
       inputs: encoded_questions
       outputs: tensor_questions
     globals:
-      input_size: sentence_vocabulary_size
+      input_size: question_vocabulary_size
 
   # Model 1: RNN
   lstm:
@@ -44,7 +44,7 @@ pipeline:
     streams:
       inputs: tensor_questions
     globals:
-      input_size: sentence_vocabulary_size
+      input_size: question_vocabulary_size
       prediction_size: num_categories # C1,C2,C3,C4
 
 #: pipeline

From 6bebcd2927f7243b45bc1b7b41d37d81d24f6659 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Mon, 15 Apr 2019 16:59:49 -0700
Subject: [PATCH 05/39] c3 rnn+vgg

---
 .../c1_classification_question_onehot_bow.yml |   6 +-
 ..._classification_vf_question_onehot_bow.yml |  51 +++++++++
 ...c3_classification_all_bow_vgg16_concat.yml |   2 +-
 ...c3_classification_all_rnn_vgg16_concat.yml | 101 ++++++++++++++++++
 4 files changed, 156 insertions(+), 4 deletions(-)
 create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml
 create mode 100644 configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml

diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_question_onehot_bow.yml b/configs/vqa_med_2019/c1_classification/c1_classification_question_onehot_bow.yml
index cfba4a0..a1d9506 100644
--- a/configs/vqa_med_2019/c1_classification/c1_classification_question_onehot_bow.yml
+++ b/configs/vqa_med_2019/c1_classification/c1_classification_question_onehot_bow.yml
@@ -22,7 +22,7 @@ pipeline:
       inputs: tokenized_questions
       outputs: encoded_questions
     globals:
-      vocabulary_size: sentence_vocabulary_size
+      vocabulary_size: question_vocabulary_size
 
   question_bow_encoder:
     type: BOWEncoder
@@ -31,7 +31,7 @@ pipeline:
       inputs: encoded_questions
       outputs: bow_questions
     globals:
-        bow_size: sentence_vocabulary_size
+        bow_size: question_vocabulary_size
 
   # Model
   classifier:
@@ -41,7 +41,7 @@ pipeline:
     streams:
       inputs: bow_questions
     globals:
-      input_size: sentence_vocabulary_size
+      input_size: question_vocabulary_size
       prediction_size: vocabulary_size_c1
   
 #: pipeline
diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml
new file mode 100644
index 0000000..1d93dd4
--- /dev/null
+++ b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml
@@ -0,0 +1,51 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
+
+pipeline:
+  name: vqa_med_c1_classification_vf_question_onehot_bow
+
+  ################# SHARED #################
+
+  # Questions encoding.
+  question_tokenizer:
+    type: SentenceTokenizer
+    priority: 1.1
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  question_onehot_encoder:
+    type: SentenceOneHotEncoder
+    priority: 1.2
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: tokenized_questions
+      outputs: encoded_questions
+    globals:
+      vocabulary_size: question_vocabulary_size
+
+  question_bow_encoder:
+    type: BOWEncoder
+    priority: 1.3
+    streams:
+      inputs: encoded_questions
+      outputs: bow_questions
+    globals:
+        bow_size: question_vocabulary_size
+
+
+
+  # Model
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [100, 100]
+    priority: 3
+    streams:
+      inputs: bow_questions
+    globals:
+      input_size: question_vocabulary_size
+      prediction_size: vocabulary_size_c1
+  
+#: pipeline
diff --git a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml
index 1eea8d1..ed3ed6a 100644
--- a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml
+++ b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml
@@ -2,7 +2,7 @@
 default_configs: vqa_med_2019/c3_classification/default_c3_classification.yml
 
 pipeline:
-  name: vqa_med_c3_classification_all_bow_concat
+  name: vqa_med_c3_classification_all_bow_vgg_concat
 
   global_publisher:
     type: GlobalVariablePublisher
diff --git a/configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml
new file mode 100644
index 0000000..51b30c6
--- /dev/null
+++ b/configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml
@@ -0,0 +1,101 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c3_classification/default_c3_classification.yml
+
+pipeline:
+  name: vqa_med_c3_classification_all_rnn_vgg_concat
+
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
+    values: [100, 2, 10, 100]
+
+  # First subpipeline: question.
+  # Questions encoding.
+  question_tokenizer:
+    type: SentenceTokenizer
+    priority: 1.1
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: Embeddings
+  question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.2
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  question_lstm:
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    priority: 1.3
+    use_logsoftmax: False
+    initial_state_trainable: False
+    #num_layers: 5
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_embeddings_output_size
+
+  # 2nd subpipeline: image size.
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 2.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  # 3rd subpipeline: image.
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 3.1
+    streams:
+      inputs: images
+      predictions: image_activations
+    globals:
+      prediction_size: image_encoder_output_size
+  
+  # 4th subpipeline: concatenation + FF.
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [question_activations,image_size_activations,image_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,10],[-1,100]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: output_size
+
+
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    priority: 4.2
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: output_size
+      prediction_size: vocabulary_size_c3
+
+
+  #: pipeline

From 96e23f0a0cc27a6dfd07d50d8a4117605c48f895 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 12:05:44 -0700
Subject: [PATCH 06/39] fixed model name loading (spaces), added yes/no
 preprocessing and category to vqa_med

---
 ptp/application/pipeline_manager.py           |  6 ++++-
 .../image_text_to_class/vqa_med_2019.py       | 25 +++++++++++++++----
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/ptp/application/pipeline_manager.py b/ptp/application/pipeline_manager.py
index c3ccd88..f259523 100644
--- a/ptp/application/pipeline_manager.py
+++ b/ptp/application/pipeline_manager.py
@@ -257,6 +257,7 @@ def load(self, checkpoint_file):
 
         """
         # Load checkpoint
+        checkpoint_file = os.path.expanduser(checkpoint_file.replace(" ",""))
         # This is to be able to load a CUDA-trained model on CPU
         chkpt = torch.load(checkpoint_file, map_location=lambda storage, loc: storage)
 
@@ -301,8 +302,11 @@ def load_models(self):
                 try:
                     # Check if file exists. 
                     checkpoint_filename = model.config["load"]
+                    # TODO: if checkpoint_file is a list!!
+                    checkpoint_filename = os.path.expanduser(checkpoint_filename.replace(" ",""))
+                    # Check if file exists.
                     if not os.path.isfile(checkpoint_filename):
-                        log_str += "Coud not import parameters of model '{}' from checkpoint {} as file does not exist\n".format(
+                        log_str += "Could not import parameters of model '{}' from checkpoint {} as file does not exist\n".format(
                             model.name,
                             checkpoint_filename
                             )
diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
index ea22976..88dd8c7 100644
--- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py
+++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
@@ -96,9 +96,9 @@ def __init__(self, name, config):
         self.scale_image_width = self.config['scale_image_size'][1]
 
         # Set parameters and globals related to categories.
-        self.globals["num_categories"] = 4
-        self.globals["category_word_mappings"] = {'C1': 0, 'C2': 1, 'C3': 2, 'C4': 3, '<UNK>': 4}
-        self.category_idx_to_word = {0: 'C1', 1: 'C2', 2: 'C3', 3: 'C4', 4: '<UNK>'}
+        self.globals["num_categories"] = 6
+        self.globals["category_word_mappings"] = {'C1': 0, 'C2': 1, 'C3': 2, 'C4': 3, 'BINARY': 4, '<UNK>': 5}
+        self.category_idx_to_word = {0: 'C1', 1: 'C2', 2: 'C3', 3: 'C4', 4: 'BINARY', 5: '<UNK>'}
 
         # Check if we want to remove punctuation from questions/answer
         self.remove_punctuation = self.config["remove_punctuation"]
@@ -304,12 +304,27 @@ def __getitem__(self, index):
         data_dict[self.key_answers] = item[self.key_answers]
 
         # Question category related variables.
-        data_dict[self.key_category_ids] = item[self.key_category_ids]
-        data_dict[self.key_category_names] = self.category_idx_to_word[item[self.key_category_ids]]
+        # Check if this is binary question.
+        if self.predict_yes_no(item[self.key_questions]):
+            data_dict[self.key_category_ids] = 4 # Binary.
+            data_dict[self.key_category_names] = self.category_idx_to_word[4]
+        else:
+            data_dict[self.key_category_ids] = item[self.key_category_ids]
+            data_dict[self.key_category_names] = self.category_idx_to_word[item[self.key_category_ids]]
 
         # Return sample.
         return data_dict
 
+    def predict_yes_no(self, qtext):
+        """
+        Determines whether this is binary (yes/no) type of question.
+        """
+        yes_no_starters = ['is','was','are','does']
+        tokens = qtext.split(' ')
+        first_token = tokens[0]
+        if first_token in yes_no_starters and ('or' not in tokens):
+            return True
+        return False
 
     def collate_fn(self, batch):
         """

From 101d583824935e2ef3533fedffa1971f2449e86c Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 12:07:21 -0700
Subject: [PATCH 07/39] c1 all rnn vgg, updated configs for categorization, c1
 vf init

---
 ...c1_classification_all_rnn_vgg16_concat.yml | 101 ++++++++++++++++
 ..._classification_vf_question_onehot_bow.yml |  51 --------
 ...question_rnn_separate_q_categorization.yml | 107 +++++++++++++++++
 ...cation_vf_shared_question_flow_rnn_ffn.yml | 109 ++++++++++++++++++
 .../default_c1_classification.yml             |  14 +--
 .../default_c3_classification.yml             |  14 +--
 .../default_question_categorization.yml       |  19 +--
 .../question_categorization_rnn.yml           |   2 -
 .../question_categorization_rnn_ffn.yml       |  65 +++++++++++
 9 files changed, 408 insertions(+), 74 deletions(-)
 create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml
 delete mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml
 create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml
 create mode 100644 configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml
 create mode 100644 configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml

diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml
new file mode 100644
index 0000000..62b4389
--- /dev/null
+++ b/configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml
@@ -0,0 +1,101 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
+
+pipeline:
+  name: vqa_med_c1_classification_all_rnn_vgg_concat
+
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
+    values: [100, 2, 10, 100]
+
+  # First subpipeline: question.
+  # Questions encoding.
+  question_tokenizer:
+    type: SentenceTokenizer
+    priority: 1.1
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: Embeddings
+  question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.2
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  question_lstm:
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    priority: 1.3
+    use_logsoftmax: False
+    initial_state_trainable: False
+    #num_layers: 5
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_embeddings_output_size
+
+  # 2nd subpipeline: image size.
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 2.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  # 3rd subpipeline: image.
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 3.1
+    streams:
+      inputs: images
+      predictions: image_activations
+    globals:
+      prediction_size: image_encoder_output_size
+  
+  # 4th subpipeline: concatenation + FF.
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [question_activations,image_size_activations,image_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,10],[-1,100]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: output_size
+
+
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    priority: 4.2
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: output_size
+      prediction_size: vocabulary_size_c1
+
+
+  #: pipeline
diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml
deleted file mode 100644
index 1d93dd4..0000000
--- a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-# Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
-
-pipeline:
-  name: vqa_med_c1_classification_vf_question_onehot_bow
-
-  ################# SHARED #################
-
-  # Questions encoding.
-  question_tokenizer:
-    type: SentenceTokenizer
-    priority: 1.1
-    streams: 
-      inputs: questions
-      outputs: tokenized_questions
-
-  question_onehot_encoder:
-    type: SentenceOneHotEncoder
-    priority: 1.2
-    data_folder: ~/data/vqa-med
-    word_mappings_file: questions.all.word.mappings.csv
-    export_word_mappings_to_globals: True
-    streams:
-      inputs: tokenized_questions
-      outputs: encoded_questions
-    globals:
-      vocabulary_size: question_vocabulary_size
-
-  question_bow_encoder:
-    type: BOWEncoder
-    priority: 1.3
-    streams:
-      inputs: encoded_questions
-      outputs: bow_questions
-    globals:
-        bow_size: question_vocabulary_size
-
-
-
-  # Model
-  classifier:
-    type: FeedForwardNetwork 
-    hidden_sizes: [100, 100]
-    priority: 3
-    streams:
-      inputs: bow_questions
-    globals:
-      input_size: question_vocabulary_size
-      prediction_size: vocabulary_size_c1
-  
-#: pipeline
diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml
new file mode 100644
index 0000000..785881f
--- /dev/null
+++ b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml
@@ -0,0 +1,107 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
+
+pipeline:
+  name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization
+
+  
+  ################# SHARED #################
+
+   # Questions encoding.
+  question_tokenizer:
+    type: SentenceTokenizer
+    priority: 0.2
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  ################# FLOW 0: CATEGORY #################
+
+  # Model 2: Embeddings
+  question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE
+    load:  ~/Documents/image-clef-2019/experiments/q_categorization/20190416_103111/checkpoints/vqa_med_question_categorization_rnn_best.pt
+    freeze: True
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  lstm:
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE
+    load:  ~/Documents/image-clef-2019/experiments/q_categorization/20190416_103111/checkpoints/vqa_med_question_categorization_rnn_best.pt
+    freeze: True
+    prediction_mode: Last
+    priority: 0.4
+    initial_state_trainable: False
+    streams:
+      inputs: embedded_questions
+      predictions: predicted_question_categories
+    globals:
+      input_size: embeddings_size
+      prediction_size: num_categories # C1,C2,C3,C4,BIN,UNK
+
+
+  category_decoder:
+    type: WordDecoder
+    priority: 0.5
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predicted_question_categories
+      outputs: predicted_question_categories_ids
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  ################# FLOW C1: question #################
+
+  flow_c1_string_to_mask:
+    type: StringToMask
+    priority: 1.1
+    globals:
+      word_mappings: category_word_mappings
+    streams:
+      strings: predicted_question_categories_ids
+      string_indices: flow_c1_targets
+      masks: flow_c1_masks
+
+  # Model 1: Embeddings
+  flow_c1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.2
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: flow_c1_embedded_questions
+    globals:
+      embeddings_size: flow_c1_embeddings_size  
+  
+  # Model 2: RNN
+  flow_c1_lstm:
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    priority: 1.3
+    initial_state_trainable: False
+    #num_layers: 5
+    hidden_size: 50
+    streams:
+      inputs: flow_c1_embedded_questions
+    globals:
+      input_size: flow_c1_embeddings_size
+      prediction_size: vocabulary_size_c1
+
+
+#: pipeline
diff --git a/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml b/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml
new file mode 100644
index 0000000..69f14bf
--- /dev/null
+++ b/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml
@@ -0,0 +1,109 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
+
+pipeline:
+  name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization
+
+  
+  ################# SHARED #################
+
+   # Questions encoding.
+  question_tokenizer:
+    type: SentenceTokenizer
+    priority: 0.2
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  ################# FLOW 0: CATEGORY #################
+
+  # Model 2: Embeddings
+  question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE
+    load:  ~/Documents/image-clef-2019/experiments/q_categorization/ xxx /checkpoints/vqa_med_question_categorization_rnn_best.pt
+    freeze: True
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  lstm:
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE
+    load:  ~/Documents/image-clef-2019/experiments/q_categorization/ xxx /checkpoints/vqa_med_question_categorization_rnn_best.pt
+    freeze: True
+    prediction_mode: Last
+    priority: 0.4
+    initial_state_trainable: False
+    streams:
+      inputs: embedded_questions
+      predictions: predicted_question_categories
+    globals:
+      input_size: embeddings_size
+      prediction_size: num_categories  # C1,C2,C3,C4,BIN,UNK
+
+
+  category_decoder:
+    type: WordDecoder
+    priority: 0.5
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predicted_question_categories
+      outputs: predicted_question_categories_ids
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  ################# FLOW C1: question #################
+
+  flow_c1_string_to_mask:
+    type: StringToMask
+    priority: 1.1
+    globals:
+      # This has to be changed to {0: C1}
+      # Question is what to do ewith UNK?
+      word_mappings: category_word_mappings
+    streams:
+      strings: predicted_question_categories_ids
+      string_indices: flow_c1_targets
+      masks: flow_c1_masks
+
+  # Model 1: Embeddings
+  flow_c1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.2
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: flow_c1_embedded_questions
+    globals:
+      embeddings_size: flow_c1_embeddings_size  
+  
+  # Model 2: RNN
+  flow_c1_lstm:
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    priority: 1.3
+    initial_state_trainable: False
+    #num_layers: 5
+    hidden_size: 50
+    streams:
+      inputs: flow_c1_embedded_questions
+    globals:
+      input_size: flow_c1_embeddings_size
+      prediction_size: vocabulary_size_c1
+
+
+#: pipeline
diff --git a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml
index abf631c..27bd757 100644
--- a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml
+++ b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml
@@ -60,15 +60,15 @@ pipeline:
       loss: loss
 
   # Statistics.
-  accuracy:
-    type: AccuracyStatistics
-    priority: 100.1
-    streams:
-      targets: answers_ids
-
   batch_size:
     type: BatchSizeStatistics
-    priority: 100.2
+    priority: 100.1
+
+  #accuracy:
+  #  type: AccuracyStatistics
+  #  priority: 100.2
+  #  streams:
+  #    targets: answers_ids
 
   precision_recall:
     type: PrecisionRecallStatistics
diff --git a/configs/vqa_med_2019/c3_classification/default_c3_classification.yml b/configs/vqa_med_2019/c3_classification/default_c3_classification.yml
index 863ed6a..e3d2a39 100644
--- a/configs/vqa_med_2019/c3_classification/default_c3_classification.yml
+++ b/configs/vqa_med_2019/c3_classification/default_c3_classification.yml
@@ -60,15 +60,15 @@ pipeline:
       loss: loss
 
   # Statistics.
-  accuracy:
-    type: AccuracyStatistics
-    priority: 100.1
-    streams:
-      targets: answers_ids
-
   batch_size:
     type: BatchSizeStatistics
-    priority: 100.2
+    priority: 100.1
+
+  #accuracy:
+  #  type: AccuracyStatistics
+  #  priority: 100.2
+  #  streams:
+  #    targets: answers_ids
 
   precision_recall:
     type: PrecisionRecallStatistics
diff --git a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml
index e39d87b..b263e62 100644
--- a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml
+++ b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml
@@ -1,6 +1,11 @@
 # Load config defining problems for training, validation and testing.
 default_configs: vqa_med_2019/default_vqa_med_2019.yml
 
+training:
+  # settings parameters
+  terminal_conditions:
+    loss_stop: 1.0e-3
+
 pipeline:
 
   # Predictions decoder.
@@ -26,15 +31,15 @@ pipeline:
       loss: loss
 
   # Statistics.
-  accuracy:
-    type: AccuracyStatistics
-    priority: 100.1
-    streams:
-      targets: category_ids
-
   batch_size:
     type: BatchSizeStatistics
-    priority: 100.2
+    priority: 100.1
+
+  #accuracy:
+  #  type: AccuracyStatistics
+  #  priority: 100.2
+  #  streams:
+  #    targets: category_ids
 
   precision_recall:
     type: PrecisionRecallStatistics
diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml
index 16035d1..532cdea 100644
--- a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml
+++ b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml
@@ -31,8 +31,6 @@ pipeline:
     prediction_mode: Last
     priority: 3
     initial_state_trainable: False
-    #num_layers: 5
-    #hidden_size: 1000
     streams:
       inputs: embedded_questions
     globals:
diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml
new file mode 100644
index 0000000..7988a86
--- /dev/null
+++ b/configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml
@@ -0,0 +1,65 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/question_categorization/default_question_categorization.yml
+
+pipeline:
+  name: vqa_med_question_categorization_rnn_ffn
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
+    values: [100, 2, 10, 100]
+
+
+  # Questions encoding.
+  question_tokenizer:
+    type: SentenceTokenizer
+    priority: 1.1
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 2: Embeddings
+  question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.2
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  lstm:
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    priority: 1.3
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: lstm_activations_questions
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model
+  classifier:
+    type: FeedForwardNetwork
+    hidden: [50]
+    priority: 1.4
+    dropout_rate: 0.5
+    streams:
+      inputs: lstm_activations_questions
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+
+#: pipeline

From 9665d985edf027c14d33089ef333fe912d268d74 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 12:45:55 -0700
Subject: [PATCH 08/39] Added option to indicate model name while loading

---
 ptp/application/pipeline_manager.py | 26 +++++++++++++++++++++-----
 ptp/components/models/model.py      | 10 +++++++---
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/ptp/application/pipeline_manager.py b/ptp/application/pipeline_manager.py
index f259523..09f67ed 100644
--- a/ptp/application/pipeline_manager.py
+++ b/ptp/application/pipeline_manager.py
@@ -300,13 +300,28 @@ def load_models(self):
         for model in self.models:
             if "load" in model.config.keys():
                 try:
+                    # Determine whether checkpoint is a string (filename) or list.
+                    checkpoint = model.config["load"]
+                    print(checkpoint.to_dict())
+                    if type(checkpoint) == str:
+                        checkpoint_filename = checkpoint
+                        checkpoint_model = None
+                    else: # Assume dictionary.
+                        if 'file' not in checkpoint.keys() or 'model' not in checkpoint.keys():
+                            log_str += "The 'load' section of model '{}' from checkpoint '{}' is incorrect: it must contain a single string (with checkpoint filename) or dictionary (with two sections: checkpoint 'file' and 'model' to load)".format(
+                                model.name,
+                                checkpoint_filename
+                                )
+                            error = True
+                            continue
+                        # Ok!
+                        checkpoint_filename = checkpoint["file"]
+                        checkpoint_model = checkpoint["model"]
+
                     # Check if file exists. 
-                    checkpoint_filename = model.config["load"]
-                    # TODO: if checkpoint_file is a list!!
                     checkpoint_filename = os.path.expanduser(checkpoint_filename.replace(" ",""))
-                    # Check if file exists.
                     if not os.path.isfile(checkpoint_filename):
-                        log_str += "Could not import parameters of model '{}' from checkpoint {} as file does not exist\n".format(
+                        log_str += "Could not import parameters of model '{}' from checkpoint '{}' as file does not exist\n".format(
                             model.name,
                             checkpoint_filename
                             )
@@ -326,7 +341,8 @@ def load_models(self):
                             chkpt['status']
                             )
                     # Load model.
-                    model.load_from_checkpoint(chkpt)
+                    model.load_from_checkpoint(chkpt, checkpoint_model)
+
                     log_str += "  + Model '{}' [{}] params loaded\n".format(model.name, type(model).__name__)
                 except KeyError:
                     log_str += "  + Model '{}' [{}] params not found in checkpoint!\n".format(model.name, type(model).__name__)
diff --git a/ptp/components/models/model.py b/ptp/components/models/model.py
index f907111..f89ad57 100644
--- a/ptp/components/models/model.py
+++ b/ptp/components/models/model.py
@@ -73,13 +73,17 @@ def save_to_checkpoint(self, chkpt):
         chkpt[self.name] = self.state_dict()
 
 
-    def load_from_checkpoint(self, chkpt):
+    def load_from_checkpoint(self, chkpt, section=None):
         """
         Loads state dictionary from checkpoint.
 
-        :param: Checkpoint (dictionary) loaded from file.
+        :param chkpt: Checkpoint (dictionary) loaded from file.
+        
+        :param section: Name of the section containing params (DEFAULT: None, means that model name from current configuration will be used)\
         """
-        self.load_state_dict(chkpt[self.name])
+        if section is None:
+            section = self.name
+        self.load_state_dict(chkpt[section])
 
     def freeze(self):
         """

From d1fad47a98f67b7e87ccc255d8b777d2bbd236bb Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 12:52:25 -0700
Subject: [PATCH 09/39] Cleaned up logging comments while loading models

---
 ptp/application/pipeline_manager.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/ptp/application/pipeline_manager.py b/ptp/application/pipeline_manager.py
index 09f67ed..a6b550a 100644
--- a/ptp/application/pipeline_manager.py
+++ b/ptp/application/pipeline_manager.py
@@ -295,22 +295,20 @@ def load_models(self):
             The 'load' variable should contain path with filename of the checkpoint from which we want to load particular model.
         """
         error = False
-        log_str = ''
+        log_str = 'Trying to load the pre-trained models:\n'
         # Iterate over models.
         for model in self.models:
             if "load" in model.config.keys():
                 try:
                     # Determine whether checkpoint is a string (filename) or list.
                     checkpoint = model.config["load"]
-                    print(checkpoint.to_dict())
                     if type(checkpoint) == str:
                         checkpoint_filename = checkpoint
                         checkpoint_model = None
                     else: # Assume dictionary.
                         if 'file' not in checkpoint.keys() or 'model' not in checkpoint.keys():
-                            log_str += "The 'load' section of model '{}' from checkpoint '{}' is incorrect: it must contain a single string (with checkpoint filename) or dictionary (with two sections: checkpoint 'file' and 'model' to load)".format(
-                                model.name,
-                                checkpoint_filename
+                            log_str += "  + The 'load' section of model '{}' is incorrect: it must contain a single string (with checkpoint filename) or a dictionary (with two sections: checkpoint 'file' and 'model' to load)\n".format(
+                                model.name
                                 )
                             error = True
                             continue
@@ -321,7 +319,7 @@ def load_models(self):
                     # Check if file exists. 
                     checkpoint_filename = os.path.expanduser(checkpoint_filename.replace(" ",""))
                     if not os.path.isfile(checkpoint_filename):
-                        log_str += "Could not import parameters of model '{}' from checkpoint '{}' as file does not exist\n".format(
+                        log_str += "  + Could not import parameters of model '{}' from checkpoint '{}' as file does not exist\n".format(
                             model.name,
                             checkpoint_filename
                             )
@@ -332,7 +330,7 @@ def load_models(self):
                     # This is to be able to load a CUDA-trained model on CPU
                     chkpt = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage)
 
-                    log_str += "Importing model '{}' from pipeline '{}' parameters from checkpoint from {} (episode: {}, loss: {}, status: {}):\n".format(
+                    log_str += "  + Importing model '{}' from pipeline '{}' parameters from checkpoint from {} (episode: {}, loss: {}, status: {})\n".format(
                             model.name,
                             chkpt['name'],
                             chkpt['timestamp'],

From 7611a5bbf902e7fdfc5311a28ceb47b8cadb7826 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 13:01:59 -0700
Subject: [PATCH 10/39] C1 variational flow - shared rnn with question
 categorization

---
 ...cation_vf_shared_question_flow_rnn_ffn.yml | 127 ++++++++++++++++++
 ...cation_vf_shared_question_flow_rnn_ffn.yml | 109 ---------------
 .../question_categorization_rnn.yml           |   2 +-
 3 files changed, 128 insertions(+), 110 deletions(-)
 create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml
 delete mode 100644 configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml

diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml
new file mode 100644
index 0000000..5c2c7b8
--- /dev/null
+++ b/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml
@@ -0,0 +1,127 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
+
+pipeline:
+  name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization
+
+  
+  ################# FLOW 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
+    values: [100, 2, 10, 100]
+
+   # Questions encoding.
+  flow0_question_tokenizer:
+    priority: 0.2
+    type: SentenceTokenizer
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: question embeddings
+  flow0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  flow0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: lstm_activations_questions
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  ################# FLOW 1: CATEGORY #################
+
+  # Model 3: FFN question category
+  flow1_classifier:
+    priority: 1.1
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: lstm_activations_questions
+      predictions: predicted_question_categories
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+
+  flow1_category_decoder:
+    priority: 1.2
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predicted_question_categories
+      outputs: predicted_question_categories_ids
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  ################# FLOW C1: question #################
+
+  flow2_c1_string_to_mask:
+    priority: 2.1
+    type: StringToMask
+    globals:
+      # This has to be changed to {0: C1}
+      # Question is what to do ewith UNK?
+      word_mappings: category_word_mappings
+    streams:
+      strings: predicted_question_categories_ids
+      string_indices: flow_c1_targets
+      masks: flow_c1_masks
+
+  # Model 4: FFN C1 answering
+  flow2_c1_lstm:
+    priority: 2.2
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: lstm_activations_questions
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: vocabulary_size_c1
+
+
+#: pipeline
diff --git a/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml b/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml
deleted file mode 100644
index 69f14bf..0000000
--- a/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml
+++ /dev/null
@@ -1,109 +0,0 @@
-# Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
-
-pipeline:
-  name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization
-
-  
-  ################# SHARED #################
-
-   # Questions encoding.
-  question_tokenizer:
-    type: SentenceTokenizer
-    priority: 0.2
-    streams: 
-      inputs: questions
-      outputs: tokenized_questions
-
-  ################# FLOW 0: CATEGORY #################
-
-  # Model 2: Embeddings
-  question_embeddings:
-    type: SentenceEmbeddings
-    priority: 0.3
-    # LOAD AND FREEZE
-    load:  ~/Documents/image-clef-2019/experiments/q_categorization/ xxx /checkpoints/vqa_med_question_categorization_rnn_best.pt
-    freeze: True
-    embeddings_size: 50
-    pretrained_embeddings_file: glove.6B.50d.txt
-    data_folder: ~/data/vqa-med
-    word_mappings_file: questions.all.word.mappings.csv
-    streams:
-      inputs: tokenized_questions
-      outputs: embedded_questions      
-  
-  # Model 2: RNN
-  lstm:
-    type: RecurrentNeuralNetwork
-    cell_type: LSTM
-    # LOAD AND FREEZE
-    load:  ~/Documents/image-clef-2019/experiments/q_categorization/ xxx /checkpoints/vqa_med_question_categorization_rnn_best.pt
-    freeze: True
-    prediction_mode: Last
-    priority: 0.4
-    initial_state_trainable: False
-    streams:
-      inputs: embedded_questions
-      predictions: predicted_question_categories
-    globals:
-      input_size: embeddings_size
-      prediction_size: num_categories  # C1,C2,C3,C4,BIN,UNK
-
-
-  category_decoder:
-    type: WordDecoder
-    priority: 0.5
-    # Use the same word mappings as label indexer.
-    import_word_mappings_from_globals: True
-    streams:
-      inputs: predicted_question_categories
-      outputs: predicted_question_categories_ids
-    globals:
-      vocabulary_size: num_categories
-      word_mappings: category_word_mappings
-
-  ################# FLOW C1: question #################
-
-  flow_c1_string_to_mask:
-    type: StringToMask
-    priority: 1.1
-    globals:
-      # This has to be changed to {0: C1}
-      # Question is what to do ewith UNK?
-      word_mappings: category_word_mappings
-    streams:
-      strings: predicted_question_categories_ids
-      string_indices: flow_c1_targets
-      masks: flow_c1_masks
-
-  # Model 1: Embeddings
-  flow_c1_question_embeddings:
-    type: SentenceEmbeddings
-    priority: 1.2
-    embeddings_size: 50
-    pretrained_embeddings_file: glove.6B.50d.txt
-    data_folder: ~/data/vqa-med
-    word_mappings_file: questions.all.word.mappings.csv
-    streams:
-      inputs: tokenized_questions
-      outputs: flow_c1_embedded_questions
-    globals:
-      embeddings_size: flow_c1_embeddings_size  
-  
-  # Model 2: RNN
-  flow_c1_lstm:
-    type: RecurrentNeuralNetwork
-    cell_type: LSTM
-    prediction_mode: Last
-    priority: 1.3
-    initial_state_trainable: False
-    #num_layers: 5
-    hidden_size: 50
-    streams:
-      inputs: flow_c1_embedded_questions
-    globals:
-      input_size: flow_c1_embeddings_size
-      prediction_size: vocabulary_size_c1
-
-
-#: pipeline
diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml
index 532cdea..7097041 100644
--- a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml
+++ b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml
@@ -35,6 +35,6 @@ pipeline:
       inputs: embedded_questions
     globals:
       input_size: embeddings_size
-      prediction_size: num_categories # C1,C2,C3,C4
+      prediction_size: num_categories # C1,C2,C3,C4,BIN,UNK
 
 #: pipeline

From 4faa4bc0abeccf8e7216ab0bf6ce8b97e27e437a Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 15:48:52 -0700
Subject: [PATCH 11/39] Added out_of_vocabulary to LabelIndexer, first VQAMED
 variational flow - c1 and binary

---
 .../default/components/text/label_indexer.yml |   4 +
 ...cation_vf_shared_question_flow_rnn_ffn.yml | 127 --------
 ...es_shared_question_rnn_two_ffns_losses.yml | 273 ++++++++++++++++++
 ptp/components/text/label_indexer.py          |   9 +-
 4 files changed, 285 insertions(+), 128 deletions(-)
 delete mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml
 create mode 100644 configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml

diff --git a/configs/default/components/text/label_indexer.yml b/configs/default/components/text/label_indexer.yml
index 45f6e6f..5b871e9 100644
--- a/configs/default/components/text/label_indexer.yml
+++ b/configs/default/components/text/label_indexer.yml
@@ -25,6 +25,10 @@ import_word_mappings_from_globals: False
 # Flag informing whether word mappings will be exported to globals (LOADED)
 export_word_mappings_to_globals: False
 
+# Value that will be used when word is out of vocavbulary (LOADED)
+# (Mask for that element will be 0 as well)
+out_of_vocabulary_value: -1
+
 streams: 
   ####################################################################
   # 2. Keymappings associated with INPUT and OUTPUT streams.
diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml
deleted file mode 100644
index 5c2c7b8..0000000
--- a/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml
+++ /dev/null
@@ -1,127 +0,0 @@
-# Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
-
-pipeline:
-  name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization
-
-  
-  ################# FLOW 0: SHARED #################
-
-  # Add global variables.
-  global_publisher:
-    type: GlobalVariablePublisher
-    priority: 0
-    # Add input_size to globals.
-    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
-    values: [100, 2, 10, 100]
-
-   # Questions encoding.
-  flow0_question_tokenizer:
-    priority: 0.2
-    type: SentenceTokenizer
-    streams: 
-      inputs: questions
-      outputs: tokenized_questions
-
-  # Model 1: question embeddings
-  flow0_question_embeddings:
-    type: SentenceEmbeddings
-    priority: 0.3
-    # LOAD AND FREEZE #
-    load: 
-      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
-      model: question_embeddings
-    freeze: True
-    ###################
-    embeddings_size: 50
-    pretrained_embeddings_file: glove.6B.50d.txt
-    data_folder: ~/data/vqa-med
-    word_mappings_file: questions.all.word.mappings.csv
-    streams:
-      inputs: tokenized_questions
-      outputs: embedded_questions      
-  
-  # Model 2: question RNN
-  flow0_lstm:
-    priority: 0.4
-    type: RecurrentNeuralNetwork
-    cell_type: LSTM
-    # LOAD AND FREEZE #
-    load: 
-      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
-      model: lstm
-    freeze: True
-    ###################
-    prediction_mode: Last
-    initial_state_trainable: True
-    use_logsoftmax: False
-    dropout_rate: 0.5
-    streams:
-      inputs: embedded_questions
-      predictions: lstm_activations_questions
-    globals:
-      input_size: embeddings_size
-      prediction_size: question_lstm_output_size
-
-  ################# FLOW 1: CATEGORY #################
-
-  # Model 3: FFN question category
-  flow1_classifier:
-    priority: 1.1
-    type: FeedForwardNetwork
-    # LOAD AND FREEZE #
-    load: 
-      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
-      model: classifier
-    freeze: True
-    ###################
-    hidden: [50]
-    dropout_rate: 0.5
-    streams:
-      inputs: lstm_activations_questions
-      predictions: predicted_question_categories
-    globals:
-      input_size: question_lstm_output_size # Set by global publisher
-      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
-
-
-  flow1_category_decoder:
-    priority: 1.2
-    type: WordDecoder
-    # Use the same word mappings as label indexer.
-    import_word_mappings_from_globals: True
-    streams:
-      inputs: predicted_question_categories
-      outputs: predicted_question_categories_ids
-    globals:
-      vocabulary_size: num_categories
-      word_mappings: category_word_mappings
-
-  ################# FLOW C1: question #################
-
-  flow2_c1_string_to_mask:
-    priority: 2.1
-    type: StringToMask
-    globals:
-      # This has to be changed to {0: C1}
-      # Question is what to do ewith UNK?
-      word_mappings: category_word_mappings
-    streams:
-      strings: predicted_question_categories_ids
-      string_indices: flow_c1_targets
-      masks: flow_c1_masks
-
-  # Model 4: FFN C1 answering
-  flow2_c1_lstm:
-    priority: 2.2
-    type: FeedForwardNetwork
-    hidden: [50]
-    dropout_rate: 0.5
-    streams:
-      inputs: lstm_activations_questions
-    globals:
-      input_size: question_lstm_output_size # Set by global publisher
-      prediction_size: vocabulary_size_c1
-
-
-#: pipeline
diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
new file mode 100644
index 0000000..f4172bd
--- /dev/null
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
@@ -0,0 +1,273 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1
+  sampler:
+    name: WeightedRandomSampler
+    weights: ~/data/vqa-med/answers.c1.weights.csv
+  dataloader:
+    num_workers: 4
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1
+  dataloader:
+    num_workers: 4
+
+
+pipeline:
+  name: c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses
+  
+  ################# FLOW 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix]
+    values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+
+
+
+  ################# FLOW 0: SHARED QUESTION #################
+
+    # Questions encoding.
+  flow0_question_tokenizer:
+    priority: 0.2
+    type: SentenceTokenizer
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: question embeddings
+  flow0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    #load: 
+    #  file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+    #  model: question_embeddings
+    #freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  flow0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    #load: 
+    #  file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+    #  model: lstm
+    #freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: lstm_activations_questions
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  ################# FLOW 1: CATEGORY #################
+
+  # Model 3: FFN question category
+  flow1_classifier:
+    priority: 1.1
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    #load: 
+    #  file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+    #  model: classifier
+    #freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: lstm_activations_questions
+      predictions: predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+
+  flow1_category_decoder:
+    priority: 1.2
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predicted_question_categories_preds
+      outputs: predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  ################# FLOW 2: C1 question #################
+
+  # Answer encoding for flow 2.
+  flow2_c1_answer_indexer:
+    type: LabelIndexer
+    priority: 2.0
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_without_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: flow2_c1_answers_without_yn_ids
+    globals:
+      vocabulary_size: vocabulary_size_c1_without_yn
+      word_mappings: word_mappings_c1_without_yn
+
+  # Sample masking based on categories.
+  flow2_c1_string_to_mask:
+    priority: 2.1
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_word_to_ix
+    streams:
+      strings: category_names # predicted_question_categories_names ## FOR NOW!
+      string_indices: predicted_c1_question_categories_indices # NOT USED
+      masks: flow2_c1_masks
+
+  # Model 4: FFN C1 answering
+  flow2_c1_lstm:
+    priority: 2.2
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: lstm_activations_questions
+      predictions: flow2_c1_predictions
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: vocabulary_size_c1_without_yn
+
+  flow2_c1_nllloss:
+    type: NLLLoss
+    priority: 2.3
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: flow2_c1_predictions
+      masks: flow2_c1_masks
+      targets: flow2_c1_answers_without_yn_ids
+      loss: flow2_c1_loss
+
+  flow2_c1_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 2.4
+    use_word_mappings: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: flow2_c1_masks
+      predictions: flow2_c1_predictions
+      targets: flow2_c1_answers_without_yn_ids
+    globals:
+      word_mappings: word_mappings_c1_without_yn
+      num_classes: vocabulary_size_c1_without_yn
+    statistics:
+      precision: flow2_c1_precision
+      recall: flow2_c1_recall
+      f1score: flow2_c1_f1score
+
+  ################# FLOW 3: BINARY question #################
+
+  # Answer encoding for flow 3.
+  flow3_binary_answer_indexer:
+    type: LabelIndexer
+    priority: 3.0
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: flow3_binary_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_binary_yn
+      word_mappings: word_mappings_binary_yn
+
+  flow3_binary_string_to_mask:
+    priority: 3.1
+    type: StringToMask
+    globals:
+      word_mappings: category_binary_word_to_ix
+    streams:
+      strings: category_names # predicted_question_categories_names ## FOR NOW!
+      string_indices: predicted_binary_question_categories_indices # NOT USED
+      masks: flow3_binary_masks
+
+  # Model 4: FFN C1 answering
+  flow3_binary_lstm:
+    priority: 3.2
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: lstm_activations_questions
+      predictions: flow3_binary_predictions
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: vocabulary_size_binary_yn
+
+  flow3_binary_nllloss:
+    type: NLLLoss
+    priority: 3.3
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: flow3_binary_predictions
+      masks: flow3_binary_masks
+      targets: flow3_binary_answers_ids
+      loss: flow3_binary_loss
+
+  flow3_binary_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 3.4
+    use_word_mappings: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: flow3_binary_masks
+      predictions: flow3_binary_predictions
+      targets: flow3_binary_answers_ids
+    globals:
+      word_mappings: word_mappings_binary_yn
+      num_classes: vocabulary_size_binary_yn
+    statistics:
+      precision: flow3_binary_precision
+      recall: flow3_binary_recall
+      f1score: flow3_binary_f1score
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 100.1
+    input_streams: questions,answers, category_names,predicted_question_categories_names, flow2_c1_masks,flow2_c1_answers_without_yn_ids,flow2_c1_predictions, flow3_binary_masks,flow3_binary_answers_ids,flow3_binary_predictions
+
+
+#: pipeline
diff --git a/ptp/components/text/label_indexer.py b/ptp/components/text/label_indexer.py
index f9728ea..410aa46 100644
--- a/ptp/components/text/label_indexer.py
+++ b/ptp/components/text/label_indexer.py
@@ -42,6 +42,9 @@ def __init__(self, name, config):
         self.key_inputs = self.stream_keys["inputs"]
         self.key_outputs = self.stream_keys["outputs"]
 
+        # Get value from configuration.
+        self.out_of_vocabulary_value = self.config["out_of_vocabulary_value"]
+
 
     def input_data_definitions(self):
         """ 
@@ -81,7 +84,11 @@ def __call__(self, data_dict):
         for sample in inputs:
             assert not isinstance(sample, (list,)), 'This encoder requires input sample to contain a single word'
             # Process single token.
-            output_sample = self.word_to_ix[sample]
+            if sample in self.word_to_ix.keys():
+                output_sample = self.word_to_ix[sample]
+            else:
+                # Word out of vocabulary.
+                output_sample = self.out_of_vocabulary_value
             outputs_list.append(output_sample)
         # Transform to tensor.
         output_tensor = torch.tensor(outputs_list)

From a6133f7bdab47cc349da0a52289e332b262a9fb0 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 16:34:26 -0700
Subject: [PATCH 12/39] pipe with pretrained categorization and two losses for
 C1 and binary questions

---
 ...on_shared_question_rnn_two_ffns_losses.yml | 366 ++++++++++++++++++
 1 file changed, 366 insertions(+)
 create mode 100644 configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml

diff --git a/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
new file mode 100644
index 0000000..eb883db
--- /dev/null
+++ b/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
@@ -0,0 +1,366 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1
+  sampler:
+    name: WeightedRandomSampler
+    weights: ~/data/vqa-med/answers.c1.weights.csv
+  dataloader:
+    num_workers: 4
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1
+  dataloader:
+    num_workers: 4
+
+
+pipeline:
+  name: c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
+  
+  ################# FLOW 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix]
+    values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  ################# FLOW 0: SHARED QUESTION #################
+
+    # Questions encoding.
+  flow0_question_tokenizer:
+    priority: 0.2
+    type: SentenceTokenizer
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: question embeddings
+  flow0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    #load: 
+    #  file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+    #  model: question_embeddings
+    #freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  flow0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    #load: 
+    #  file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+    #  model: lstm
+    #freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: lstm_activations_questions
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Answer encoding
+  flow0_all_answer_indexer:
+    type: LabelIndexer
+    priority: 0.6
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_all_c1_binasry
+      word_mappings: word_mappings_all_c1_binary
+
+
+  ################# FLOW 1: CATEGORY #################
+
+  # Model 1: question embeddings
+  flow1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: flow1_embedded_questions      
+  
+  # Model 2: question RNN
+  flow1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: flow1_embedded_questions
+      predictions: flow1_lstm_activations_questions
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model 3: FFN question category
+  flow1_classifier:
+    priority: 1.3
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: flow1_lstm_activations_questions
+      predictions: flow1_predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  flow1_category_decoder:
+    priority: 1.4
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: flow1_predicted_question_categories_preds
+      outputs: flow1_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  flow1_category_accuracy:
+    type: AccuracyStatistics
+    priority: 1.5
+    streams:
+      targets: category_ids
+      predictions: flow1_predicted_question_categories_preds
+
+  ################# FLOW 2: C1 question #################
+
+  # Answer encoding for flow 2.
+  flow2_c1_answer_indexer:
+    type: LabelIndexer
+    priority: 2.0
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_without_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: flow2_c1_answers_without_yn_ids
+    globals:
+      vocabulary_size: vocabulary_size_c1_without_yn
+      word_mappings: word_mappings_c1_without_yn
+
+  # Sample masking based on categories.
+  flow2_c1_string_to_mask:
+    priority: 2.1
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_word_to_ix
+    streams:
+      strings: flow1_predicted_question_categories_names
+      string_indices: predicted_c1_question_categories_indices # NOT USED
+      masks: flow2_c1_masks
+
+  # Model 4: FFN C1 answering
+  flow2_c1_lstm:
+    priority: 2.2
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: lstm_activations_questions
+      predictions: flow2_c1_predictions
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: vocabulary_size_c1_without_yn
+
+  flow2_c1_nllloss:
+    type: NLLLoss
+    priority: 2.3
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: flow2_c1_predictions
+      masks: flow2_c1_masks
+      targets: flow2_c1_answers_without_yn_ids
+      loss: flow2_c1_loss
+
+  flow2_c1_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 2.4
+    use_word_mappings: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: flow2_c1_masks
+      predictions: flow2_c1_predictions
+      targets: flow2_c1_answers_without_yn_ids
+    globals:
+      word_mappings: word_mappings_c1_without_yn
+      num_classes: vocabulary_size_c1_without_yn
+    statistics:
+      precision: flow2_c1_precision
+      recall: flow2_c1_recall
+      f1score: flow2_c1_f1score
+
+  ################# FLOW 3: BINARY question #################
+
+  # Answer encoding for flow 3.
+  flow3_binary_answer_indexer:
+    type: LabelIndexer
+    priority: 3.0
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: flow3_binary_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_binary_yn
+      word_mappings: word_mappings_binary_yn
+
+  flow3_binary_string_to_mask:
+    priority: 3.1
+    type: StringToMask
+    globals:
+      word_mappings: category_binary_word_to_ix
+    streams:
+      strings: flow1_predicted_question_categories_names
+      string_indices: predicted_binary_question_categories_indices # NOT USED
+      masks: flow3_binary_masks
+
+  # Model 4: FFN C1 answering
+  flow3_binary_lstm:
+    priority: 3.2
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: lstm_activations_questions
+      predictions: flow3_binary_predictions
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: vocabulary_size_binary_yn
+
+  flow3_binary_nllloss:
+    type: NLLLoss
+    priority: 3.3
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: flow3_binary_predictions
+      masks: flow3_binary_masks
+      targets: flow3_binary_answers_ids
+      loss: flow3_binary_loss
+
+  flow3_binary_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 3.4
+    use_word_mappings: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: flow3_binary_masks
+      predictions: flow3_binary_predictions
+      targets: flow3_binary_answers_ids
+    globals:
+      word_mappings: word_mappings_binary_yn
+      num_classes: vocabulary_size_binary_yn
+    statistics:
+      precision: flow3_binary_precision
+      recall: flow3_binary_recall
+      f1score: flow3_binary_f1score
+
+  ################# FLOW 4: MERGE ANSWERS #################
+
+    merged_predictions:
+    type: JoinMaskedPredictions
+    priority: 4.1
+    # Names of used input streams.
+    input_prediction_streams: [flow2_c1_predictions, flow3_binary_predictions]
+    input_mask_streams: [flow2_c1_masks, flow3_binary_masks]
+    input_word_mappings: [word_mappings_c1_without_yn, word_mappings_binary_yn]
+    globals:
+      output_word_mappings: word_mappings_all_c1_binary
+    streams:
+      output_strings: merged_predictions
+      output_indices: merged_pred_indices
+
+  # Statistics.
+  merged_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 4.2
+    # Use prediction indices instead of distributions.
+    use_prediction_distributions: False
+    use_word_mappings: True
+    show_class_scores: True
+    show_confusion_matrix: True
+    globals:
+      word_mappings: word_mappings_all_c1_binary
+    streams:
+      targets: all_answers_ids
+      predictions: merged_pred_indices
+    statistics:
+      precision: merged_precision
+      recall: merged_recall
+      f1score: merged_f1score
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 4.3
+    input_streams: questions,answers, category_names,predicted_question_categories_names, flow2_c1_masks,flow2_c1_answers_without_yn_ids,flow2_c1_predictions, flow3_binary_masks,flow3_binary_answers_ids,flow3_binary_predictions, merged_predictions
+
+
+#: pipeline

From 95776c892d5d4d4a674bbec2271ac2b72779b34d Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 16:36:11 -0700
Subject: [PATCH 13/39] rename

---
 ...gory_classification_shared_question_rnn_two_ffns_losses.yml} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename configs/vqa_med_2019/vf/{c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml => c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml} (99%)

diff --git a/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
similarity index 99%
rename from configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
rename to configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
index eb883db..488d8d6 100644
--- a/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
@@ -20,7 +20,7 @@ validation:
 
 
 pipeline:
-  name: c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
+  name: c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses
   
   ################# FLOW 0: SHARED #################
 

From 33d296c068516f1e3501ae9f12f0cac23321543a Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 16:49:08 -0700
Subject: [PATCH 14/39] statistics typo fix in config - c1+binary vf

---
 ...ry_classification_shared_question_rnn_two_ffns_losses.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
index 488d8d6..f5763e3 100644
--- a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
@@ -182,6 +182,8 @@ pipeline:
     streams:
       targets: category_ids
       predictions: flow1_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
 
   ################# FLOW 2: C1 question #################
 
@@ -324,7 +326,8 @@ pipeline:
 
   ################# FLOW 4: MERGE ANSWERS #################
 
-    merged_predictions:
+  # Merge predictions
+  merged_predictions:
     type: JoinMaskedPredictions
     priority: 4.1
     # Names of used input streams.

From ee39ace2111e481c4ac8b0d00c1e6f57ad87c4b1 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 17:05:35 -0700
Subject: [PATCH 15/39] join masked predictions test

---
 ptp/components/masking/join_masked_predictions.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ptp/components/masking/join_masked_predictions.py b/ptp/components/masking/join_masked_predictions.py
index fa0eb3d..0b418ea 100644
--- a/ptp/components/masking/join_masked_predictions.py
+++ b/ptp/components/masking/join_masked_predictions.py
@@ -123,10 +123,12 @@ def __call__(self, data_dict):
 
             - "outputs": added output field containing tensor [BATCH_SIZE x ...] 
         """
-        # Get inputs masks
+        # Get inputs masks.
         masks = []
         for imsk in self.input_mask_stream_keys:
-            masks.append(data_dict[imsk].data.cpu().numpy())
+            # Get mask from stream.
+            mask = data_dict[imsk]
+            masks.append(mask.data.cpu().numpy())
         
         # Sum all masks and make sure that they are complementary.
         masks_sum = np.sum(masks, axis=0)

From b6057949067f4ca39efea3de203ff57b31f4cb8c Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 17:53:17 -0700
Subject: [PATCH 16/39] Fixed masking in P/R flow 2 and 3

---
 ...egory_classification_shared_question_rnn_two_ffns_losses.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
index f5763e3..a7e755d 100644
--- a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
@@ -241,6 +241,7 @@ pipeline:
     type: PrecisionRecallStatistics
     priority: 2.4
     use_word_mappings: True
+    use_masking: True
     #show_class_scores: True
     #show_confusion_matrix: True
     streams:
@@ -310,6 +311,7 @@ pipeline:
     type: PrecisionRecallStatistics
     priority: 3.4
     use_word_mappings: True
+    use_masking: True
     #show_class_scores: True
     #show_confusion_matrix: True
     streams:

From edeeed9fcb1af28a45874361e1a90b4fa54db3fd Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 17:55:54 -0700
Subject: [PATCH 17/39] cleanup c1 binary hardcoded categories from problem

---
 ...es_shared_question_rnn_two_ffns_losses.yml | 35 -------------------
 1 file changed, 35 deletions(-)

diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
index f4172bd..0065fb3 100644
--- a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
@@ -37,9 +37,6 @@ pipeline:
     type: BatchSizeStatistics
     priority: 0.1
 
-
-
-
   ################# FLOW 0: SHARED QUESTION #################
 
     # Questions encoding.
@@ -92,38 +89,6 @@ pipeline:
 
   ################# FLOW 1: CATEGORY #################
 
-  # Model 3: FFN question category
-  flow1_classifier:
-    priority: 1.1
-    type: FeedForwardNetwork
-    # LOAD AND FREEZE #
-    #load: 
-    #  file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
-    #  model: classifier
-    #freeze: True
-    ###################
-    hidden: [50]
-    dropout_rate: 0.5
-    streams:
-      inputs: lstm_activations_questions
-      predictions: predicted_question_categories_preds
-    globals:
-      input_size: question_lstm_output_size # Set by global publisher
-      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
-
-
-  flow1_category_decoder:
-    priority: 1.2
-    type: WordDecoder
-    # Use the same word mappings as label indexer.
-    import_word_mappings_from_globals: True
-    streams:
-      inputs: predicted_question_categories_preds
-      outputs: predicted_question_categories_names
-    globals:
-      vocabulary_size: num_categories
-      word_mappings: category_word_mappings
-
   ################# FLOW 2: C1 question #################
 
   # Answer encoding for flow 2.

From 6d4200a651ea7f4d362bf9e2d085eda70685da11 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 18:04:41 -0700
Subject: [PATCH 18/39] masking for hardcoded c1 binary P/R

---
 ...hardcoded_categories_shared_question_rnn_two_ffns_losses.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
index 0065fb3..d5742cd 100644
--- a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
@@ -144,6 +144,7 @@ pipeline:
   flow2_c1_precision_recall:
     type: PrecisionRecallStatistics
     priority: 2.4
+    use_masking: True
     use_word_mappings: True
     #show_class_scores: True
     #show_confusion_matrix: True
@@ -213,6 +214,7 @@ pipeline:
   flow3_binary_precision_recall:
     type: PrecisionRecallStatistics
     priority: 3.4
+    use_masking: True
     use_word_mappings: True
     #show_class_scores: True
     #show_confusion_matrix: True

From 6a07a416f17285318e63f38b17da694dbb9310ad Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 16 Apr 2019 18:53:21 -0700
Subject: [PATCH 19/39] cleanups and config for c1_bin shared all encoders

---
 ...d_shared_question_rnn_two_ffns_losses.yml} |  96 ++---
 ...nn_shared_all_encoders_two_ffns_losses.yml | 401 ++++++++++++++++++
 ...n_shared_question_rnn_two_ffns_losses.yml} | 138 +++---
 3 files changed, 518 insertions(+), 117 deletions(-)
 rename configs/vqa_med_2019/vf/{c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml => c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml} (72%)
 create mode 100644 configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
 rename configs/vqa_med_2019/vf/{c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml => c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml} (74%)

diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml
similarity index 72%
rename from configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
rename to configs/vqa_med_2019/vf/c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml
index d5742cd..4c2fe60 100644
--- a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml
@@ -20,9 +20,9 @@ validation:
 
 
 pipeline:
-  name: c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses
+  name: c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses
   
-  ################# FLOW 0: SHARED #################
+  ################# PIPE 0: SHARED #################
 
   # Add global variables.
   global_publisher:
@@ -37,10 +37,10 @@ pipeline:
     type: BatchSizeStatistics
     priority: 0.1
 
-  ################# FLOW 0: SHARED QUESTION #################
+  ################# PIPE 0: SHARED QUESTION #################
 
     # Questions encoding.
-  flow0_question_tokenizer:
+  pipe0_question_tokenizer:
     priority: 0.2
     type: SentenceTokenizer
     streams: 
@@ -48,7 +48,7 @@ pipeline:
       outputs: tokenized_questions
 
   # Model 1: question embeddings
-  flow0_question_embeddings:
+  pipe0_question_embeddings:
     type: SentenceEmbeddings
     priority: 0.3
     # LOAD AND FREEZE #
@@ -66,7 +66,7 @@ pipeline:
       outputs: embedded_questions      
   
   # Model 2: question RNN
-  flow0_lstm:
+  pipe0_lstm:
     priority: 0.4
     type: RecurrentNeuralNetwork
     cell_type: LSTM
@@ -87,12 +87,12 @@ pipeline:
       input_size: embeddings_size
       prediction_size: question_lstm_output_size
 
-  ################# FLOW 1: CATEGORY #################
+  ################# PIPE 1: CATEGORY #################
 
-  ################# FLOW 2: C1 question #################
+  ################# PIPE 2: C1 question #################
 
-  # Answer encoding for flow 2.
-  flow2_c1_answer_indexer:
+  # Answer encoding for pipe 2.
+  pipe2_c1_answer_indexer:
     type: LabelIndexer
     priority: 2.0
     data_folder: ~/data/vqa-med
@@ -101,13 +101,13 @@ pipeline:
     export_word_mappings_to_globals: True
     streams:
       inputs: answers
-      outputs: flow2_c1_answers_without_yn_ids
+      outputs: pipe2_c1_answers_without_yn_ids
     globals:
       vocabulary_size: vocabulary_size_c1_without_yn
       word_mappings: word_mappings_c1_without_yn
 
   # Sample masking based on categories.
-  flow2_c1_string_to_mask:
+  pipe2_c1_string_to_mask:
     priority: 2.1
     type: StringToMask
     globals:
@@ -115,33 +115,33 @@ pipeline:
     streams:
       strings: category_names # predicted_question_categories_names ## FOR NOW!
       string_indices: predicted_c1_question_categories_indices # NOT USED
-      masks: flow2_c1_masks
+      masks: pipe2_c1_masks
 
   # Model 4: FFN C1 answering
-  flow2_c1_lstm:
+  pipe2_c1_lstm:
     priority: 2.2
     type: FeedForwardNetwork
     hidden: [50]
     dropout_rate: 0.5
     streams:
       inputs: lstm_activations_questions
-      predictions: flow2_c1_predictions
+      predictions: pipe2_c1_predictions
     globals:
       input_size: question_lstm_output_size # Set by global publisher
       prediction_size: vocabulary_size_c1_without_yn
 
-  flow2_c1_nllloss:
+  pipe2_c1_nllloss:
     type: NLLLoss
     priority: 2.3
     targets_dim: 1
     use_masking: True
     streams:
-      predictions: flow2_c1_predictions
-      masks: flow2_c1_masks
-      targets: flow2_c1_answers_without_yn_ids
-      loss: flow2_c1_loss
+      predictions: pipe2_c1_predictions
+      masks: pipe2_c1_masks
+      targets: pipe2_c1_answers_without_yn_ids
+      loss: pipe2_c1_loss
 
-  flow2_c1_precision_recall:
+  pipe2_c1_precision_recall:
     type: PrecisionRecallStatistics
     priority: 2.4
     use_masking: True
@@ -149,21 +149,21 @@ pipeline:
     #show_class_scores: True
     #show_confusion_matrix: True
     streams:
-      masks: flow2_c1_masks
-      predictions: flow2_c1_predictions
-      targets: flow2_c1_answers_without_yn_ids
+      masks: pipe2_c1_masks
+      predictions: pipe2_c1_predictions
+      targets: pipe2_c1_answers_without_yn_ids
     globals:
       word_mappings: word_mappings_c1_without_yn
       num_classes: vocabulary_size_c1_without_yn
     statistics:
-      precision: flow2_c1_precision
-      recall: flow2_c1_recall
-      f1score: flow2_c1_f1score
+      precision: pipe2_c1_precision
+      recall: pipe2_c1_recall
+      f1score: pipe2_c1_f1score
 
-  ################# FLOW 3: BINARY question #################
+  ################# PIPE 3: BINARY question #################
 
-  # Answer encoding for flow 3.
-  flow3_binary_answer_indexer:
+  # Answer encoding for pipe 3.
+  pipe3_binary_answer_indexer:
     type: LabelIndexer
     priority: 3.0
     data_folder: ~/data/vqa-med
@@ -172,12 +172,12 @@ pipeline:
     export_word_mappings_to_globals: True
     streams:
       inputs: answers
-      outputs: flow3_binary_answers_ids
+      outputs: pipe3_binary_answers_ids
     globals:
       vocabulary_size: vocabulary_size_binary_yn
       word_mappings: word_mappings_binary_yn
 
-  flow3_binary_string_to_mask:
+  pipe3_binary_string_to_mask:
     priority: 3.1
     type: StringToMask
     globals:
@@ -185,33 +185,33 @@ pipeline:
     streams:
       strings: category_names # predicted_question_categories_names ## FOR NOW!
       string_indices: predicted_binary_question_categories_indices # NOT USED
-      masks: flow3_binary_masks
+      masks: pipe3_binary_masks
 
   # Model 4: FFN C1 answering
-  flow3_binary_lstm:
+  pipe3_binary_lstm:
     priority: 3.2
     type: FeedForwardNetwork
     hidden: [50]
     dropout_rate: 0.5
     streams:
       inputs: lstm_activations_questions
-      predictions: flow3_binary_predictions
+      predictions: pipe3_binary_predictions
     globals:
       input_size: question_lstm_output_size # Set by global publisher
       prediction_size: vocabulary_size_binary_yn
 
-  flow3_binary_nllloss:
+  pipe3_binary_nllloss:
     type: NLLLoss
     priority: 3.3
     targets_dim: 1
     use_masking: True
     streams:
-      predictions: flow3_binary_predictions
-      masks: flow3_binary_masks
-      targets: flow3_binary_answers_ids
-      loss: flow3_binary_loss
+      predictions: pipe3_binary_predictions
+      masks: pipe3_binary_masks
+      targets: pipe3_binary_answers_ids
+      loss: pipe3_binary_loss
 
-  flow3_binary_precision_recall:
+  pipe3_binary_precision_recall:
     type: PrecisionRecallStatistics
     priority: 3.4
     use_masking: True
@@ -219,22 +219,22 @@ pipeline:
     #show_class_scores: True
     #show_confusion_matrix: True
     streams:
-      masks: flow3_binary_masks
-      predictions: flow3_binary_predictions
-      targets: flow3_binary_answers_ids
+      masks: pipe3_binary_masks
+      predictions: pipe3_binary_predictions
+      targets: pipe3_binary_answers_ids
     globals:
       word_mappings: word_mappings_binary_yn
       num_classes: vocabulary_size_binary_yn
     statistics:
-      precision: flow3_binary_precision
-      recall: flow3_binary_recall
-      f1score: flow3_binary_f1score
+      precision: pipe3_binary_precision
+      recall: pipe3_binary_recall
+      f1score: pipe3_binary_f1score
 
   # Viewers.
   viewer:
     type: StreamViewer
     priority: 100.1
-    input_streams: questions,answers, category_names,predicted_question_categories_names, flow2_c1_masks,flow2_c1_answers_without_yn_ids,flow2_c1_predictions, flow3_binary_masks,flow3_binary_answers_ids,flow3_binary_predictions
+    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe2_c1_masks,pipe2_c1_answers_without_yn_ids,pipe2_c1_predictions, pipe3_binary_masks,pipe3_binary_answers_ids,pipe3_binary_predictions
 
 
 #: pipeline
diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
new file mode 100644
index 0000000..4ffc007
--- /dev/null
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
@@ -0,0 +1,401 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1
+  sampler:
+    name: WeightedRandomSampler
+    weights: ~/data/vqa-med/answers.c1.weights.csv
+  dataloader:
+    num_workers: 4
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1
+  dataloader:
+    num_workers: 4
+
+
+pipeline:
+  name: c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix]
+    values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  # Questions encoding.
+  pipe1_question_tokenizer:
+    priority: 0.2
+    type: SentenceTokenizer
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_questions_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    type: AccuracyStatistics
+    priority: 0.7
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+
+
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    type: LabelIndexer
+    priority: 1.3
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_all_c1_binasry
+      word_mappings: word_mappings_all_c1_binary
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 2.1
+    streams:
+      inputs: images
+      predictions: image_activations
+    globals:
+      prediction_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 3.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: SHARED CONCAT #################
+
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [questions_activations,image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100],[-1,10]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+
+  ################# PIPE 5: C1 question #################
+
+  # Answer encoding for PIPE 5.
+  pipe5_c1_answer_indexer:
+    type: LabelIndexer
+    priority: 5.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_without_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe5_c1_answers_without_yn_ids
+    globals:
+      vocabulary_size: vocabulary_size_c1_without_yn
+      word_mappings: word_mappings_c1_without_yn
+
+  # Sample masking based on categories.
+  pipe5_c1_string_to_mask:
+    priority: 5.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c1_question_categories_indices # NOT USED
+      masks: pipe5_c1_masks
+
+  # Model 4: FFN C1 answering
+  pipe5_c1_ffn:
+    priority: 5.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe5_c1_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c1_without_yn
+
+  pipe5_c1_nllloss:
+    type: NLLLoss
+    priority: 5.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe5_c1_predictions
+      masks: pipe5_c1_masks
+      targets: pipe5_c1_answers_without_yn_ids
+      loss: pipe5_c1_loss
+
+  pipe5_c1_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 5.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe5_c1_masks
+      predictions: pipe5_c1_predictions
+      targets: pipe5_c1_answers_without_yn_ids
+    globals:
+      word_mappings: word_mappings_c1_without_yn
+      num_classes: vocabulary_size_c1_without_yn
+    statistics:
+      precision: pipe5_c1_precision
+      recall: pipe5_c1_recall
+      f1score: pipe5_c1_f1score
+
+  ################# PIPE 6: BINARY question #################
+
+  # Answer encoding for pipe 6.
+  pipe6_binary_answer_indexer:
+    type: LabelIndexer
+    priority: 6.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe6_binary_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_binary_yn
+      word_mappings: word_mappings_binary_yn
+
+  pipe6_binary_string_to_mask:
+    priority: 6.2
+    type: StringToMask
+    globals:
+      word_mappings: category_binary_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_binary_question_categories_indices # NOT USED
+      masks: pipe6_binary_masks
+
+  # Model 4: FFN C1 answering
+  pipe6_binary_ffn:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_binary_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_binary_yn
+
+  pipe6_binary_nllloss:
+    type: NLLLoss
+    priority: 6.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe6_binary_predictions
+      masks: pipe6_binary_masks
+      targets: pipe6_binary_answers_ids
+      loss: pipe6_binary_loss
+
+  pipe6_binary_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 6.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe6_binary_masks
+      predictions: pipe6_binary_predictions
+      targets: pipe6_binary_answers_ids
+    globals:
+      word_mappings: word_mappings_binary_yn
+      num_classes: vocabulary_size_binary_yn
+    statistics:
+      precision: pipe6_binary_precision
+      recall: pipe6_binary_recall
+      f1score: pipe6_binary_f1score
+
+  ################# PIPE 7: MERGE ANSWERS #################
+
+  # Merge predictions
+  pipe7_merged_predictions:
+    type: JoinMaskedPredictions
+    priority: 7.1
+    # Names of used input streams.
+    input_prediction_streams: [pipe5_c1_predictions, pipe6_binary_predictions]
+    input_mask_streams: [pipe5_c1_masks, pipe6_binary_masks]
+    input_word_mappings: [word_mappings_c1_without_yn, word_mappings_binary_yn]
+    globals:
+      output_word_mappings: word_mappings_all_c1_binary
+    streams:
+      output_strings: pipe7_merged_predictions
+      output_indices: pipe7_merged_pred_indices
+
+  # Statistics.
+  pipe7_merged_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 7.2
+    # Use prediction indices instead of distributions.
+    use_prediction_distributions: False
+    use_word_mappings: True
+    show_class_scores: True
+    show_confusion_matrix: True
+    globals:
+      word_mappings: word_mappings_all_c1_binary
+    streams:
+      targets: all_answers_ids
+      predictions: pipe7_merged_pred_indices
+    statistics:
+      precision: pipe7_merged_precision
+      recall: pipe7_merged_recall
+      f1score: pipe7_merged_f1score
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 4.3
+    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_binary_masks,pipe6_binary_answers_ids,pipe6_binary_predictions, pipe7_merged_predictions
+
+
+#: pipeline
diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml
similarity index 74%
rename from configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
rename to configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml
index a7e755d..b7281c9 100644
--- a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml
@@ -20,9 +20,9 @@ validation:
 
 
 pipeline:
-  name: c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses
+  name: c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses
   
-  ################# FLOW 0: SHARED #################
+  ################# PIPE 0: SHARED #################
 
   # Add global variables.
   global_publisher:
@@ -30,17 +30,17 @@ pipeline:
     priority: 0
     # Add input_size to globals.
     keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix]
-    values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}]
+    values: [100, 2, 10, 100, {"C1": 0}, {"BINARY": 0}]
 
   # Statistics.
   batch_size:
     type: BatchSizeStatistics
     priority: 0.1
 
-  ################# FLOW 0: SHARED QUESTION #################
+  ################# PIPE 0: SHARED QUESTION #################
 
-    # Questions encoding.
-  flow0_question_tokenizer:
+  # Questions encoding.
+  pipe0_question_tokenizer:
     priority: 0.2
     type: SentenceTokenizer
     streams: 
@@ -48,7 +48,7 @@ pipeline:
       outputs: tokenized_questions
 
   # Model 1: question embeddings
-  flow0_question_embeddings:
+  pipe0_question_embeddings:
     type: SentenceEmbeddings
     priority: 0.3
     # LOAD AND FREEZE #
@@ -66,7 +66,7 @@ pipeline:
       outputs: embedded_questions      
   
   # Model 2: question RNN
-  flow0_lstm:
+  pipe0_lstm:
     priority: 0.4
     type: RecurrentNeuralNetwork
     cell_type: LSTM
@@ -88,7 +88,7 @@ pipeline:
       prediction_size: question_lstm_output_size
 
   # Answer encoding
-  flow0_all_answer_indexer:
+  pipe0_all_answer_indexer:
     type: LabelIndexer
     priority: 0.6
     data_folder: ~/data/vqa-med
@@ -103,10 +103,10 @@ pipeline:
       word_mappings: word_mappings_all_c1_binary
 
 
-  ################# FLOW 1: CATEGORY #################
+  ################# PIPE 1: CATEGORY #################
 
   # Model 1: question embeddings
-  flow1_question_embeddings:
+  pipe1_question_embeddings:
     type: SentenceEmbeddings
     priority: 1.1
     # LOAD AND FREEZE #
@@ -121,10 +121,10 @@ pipeline:
     word_mappings_file: questions.all.word.mappings.csv
     streams:
       inputs: tokenized_questions
-      outputs: flow1_embedded_questions      
+      outputs: pipe1_embedded_questions      
   
   # Model 2: question RNN
-  flow1_lstm:
+  pipe1_lstm:
     priority: 1.2
     type: RecurrentNeuralNetwork
     cell_type: LSTM
@@ -139,14 +139,14 @@ pipeline:
     use_logsoftmax: False
     dropout_rate: 0.5
     streams:
-      inputs: flow1_embedded_questions
-      predictions: flow1_lstm_activations_questions
+      inputs: pipe1_embedded_questions
+      predictions: pipe1_lstm_activations_questions
     globals:
       input_size: embeddings_size
       prediction_size: question_lstm_output_size
 
   # Model 3: FFN question category
-  flow1_classifier:
+  pipe1_classifier:
     priority: 1.3
     type: FeedForwardNetwork
     # LOAD AND FREEZE #
@@ -158,37 +158,37 @@ pipeline:
     hidden: [50]
     dropout_rate: 0.5
     streams:
-      inputs: flow1_lstm_activations_questions
-      predictions: flow1_predicted_question_categories_preds
+      inputs: pipe1_lstm_activations_questions
+      predictions: pipe1_predicted_question_categories_preds
     globals:
       input_size: question_lstm_output_size # Set by global publisher
       prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
 
-  flow1_category_decoder:
+  pipe1_category_decoder:
     priority: 1.4
     type: WordDecoder
     # Use the same word mappings as label indexer.
     import_word_mappings_from_globals: True
     streams:
-      inputs: flow1_predicted_question_categories_preds
-      outputs: flow1_predicted_question_categories_names
+      inputs: pipe1_predicted_question_categories_preds
+      outputs: pipe1_predicted_question_categories_names
     globals:
       vocabulary_size: num_categories
       word_mappings: category_word_mappings
 
-  flow1_category_accuracy:
+  pipe1_category_accuracy:
     type: AccuracyStatistics
     priority: 1.5
     streams:
       targets: category_ids
-      predictions: flow1_predicted_question_categories_preds
+      predictions: pipe1_predicted_question_categories_preds
     statistics:
       accuracy: categorization_accuracy
 
-  ################# FLOW 2: C1 question #################
+  ################# PIPE 2: C1 question #################
 
-  # Answer encoding for flow 2.
-  flow2_c1_answer_indexer:
+  # Answer encoding for PIPE 2.
+  pipe2_c1_answer_indexer:
     type: LabelIndexer
     priority: 2.0
     data_folder: ~/data/vqa-med
@@ -197,47 +197,47 @@ pipeline:
     export_word_mappings_to_globals: True
     streams:
       inputs: answers
-      outputs: flow2_c1_answers_without_yn_ids
+      outputs: pipe2_c1_answers_without_yn_ids
     globals:
       vocabulary_size: vocabulary_size_c1_without_yn
       word_mappings: word_mappings_c1_without_yn
 
   # Sample masking based on categories.
-  flow2_c1_string_to_mask:
+  pipe2_c1_string_to_mask:
     priority: 2.1
     type: StringToMask
     globals:
       word_mappings: category_c1_word_to_ix
     streams:
-      strings: flow1_predicted_question_categories_names
+      strings: pipe1_predicted_question_categories_names
       string_indices: predicted_c1_question_categories_indices # NOT USED
-      masks: flow2_c1_masks
+      masks: pipe2_c1_masks
 
   # Model 4: FFN C1 answering
-  flow2_c1_lstm:
+  pipe2_c1_lstm:
     priority: 2.2
     type: FeedForwardNetwork
     hidden: [50]
     dropout_rate: 0.5
     streams:
       inputs: lstm_activations_questions
-      predictions: flow2_c1_predictions
+      predictions: pipe2_c1_predictions
     globals:
       input_size: question_lstm_output_size # Set by global publisher
       prediction_size: vocabulary_size_c1_without_yn
 
-  flow2_c1_nllloss:
+  pipe2_c1_nllloss:
     type: NLLLoss
     priority: 2.3
     targets_dim: 1
     use_masking: True
     streams:
-      predictions: flow2_c1_predictions
-      masks: flow2_c1_masks
-      targets: flow2_c1_answers_without_yn_ids
-      loss: flow2_c1_loss
+      predictions: pipe2_c1_predictions
+      masks: pipe2_c1_masks
+      targets: pipe2_c1_answers_without_yn_ids
+      loss: pipe2_c1_loss
 
-  flow2_c1_precision_recall:
+  pipe2_c1_precision_recall:
     type: PrecisionRecallStatistics
     priority: 2.4
     use_word_mappings: True
@@ -245,21 +245,21 @@ pipeline:
     #show_class_scores: True
     #show_confusion_matrix: True
     streams:
-      masks: flow2_c1_masks
-      predictions: flow2_c1_predictions
-      targets: flow2_c1_answers_without_yn_ids
+      masks: pipe2_c1_masks
+      predictions: pipe2_c1_predictions
+      targets: pipe2_c1_answers_without_yn_ids
     globals:
       word_mappings: word_mappings_c1_without_yn
       num_classes: vocabulary_size_c1_without_yn
     statistics:
-      precision: flow2_c1_precision
-      recall: flow2_c1_recall
-      f1score: flow2_c1_f1score
+      precision: pipe2_c1_precision
+      recall: pipe2_c1_recall
+      f1score: pipe2_c1_f1score
 
-  ################# FLOW 3: BINARY question #################
+  ################# PIPE 3: BINARY question #################
 
-  # Answer encoding for flow 3.
-  flow3_binary_answer_indexer:
+  # Answer encoding for PIPE 3.
+  pipe3_binary_answer_indexer:
     type: LabelIndexer
     priority: 3.0
     data_folder: ~/data/vqa-med
@@ -268,46 +268,46 @@ pipeline:
     export_word_mappings_to_globals: True
     streams:
       inputs: answers
-      outputs: flow3_binary_answers_ids
+      outputs: pipe3_binary_answers_ids
     globals:
       vocabulary_size: vocabulary_size_binary_yn
       word_mappings: word_mappings_binary_yn
 
-  flow3_binary_string_to_mask:
+  pipe3_binary_string_to_mask:
     priority: 3.1
     type: StringToMask
     globals:
       word_mappings: category_binary_word_to_ix
     streams:
-      strings: flow1_predicted_question_categories_names
+      strings: pipe1_predicted_question_categories_names
       string_indices: predicted_binary_question_categories_indices # NOT USED
-      masks: flow3_binary_masks
+      masks: pipe3_binary_masks
 
   # Model 4: FFN C1 answering
-  flow3_binary_lstm:
+  pipe3_binary_lstm:
     priority: 3.2
     type: FeedForwardNetwork
     hidden: [50]
     dropout_rate: 0.5
     streams:
       inputs: lstm_activations_questions
-      predictions: flow3_binary_predictions
+      predictions: pipe3_binary_predictions
     globals:
       input_size: question_lstm_output_size # Set by global publisher
       prediction_size: vocabulary_size_binary_yn
 
-  flow3_binary_nllloss:
+  pipe3_binary_nllloss:
     type: NLLLoss
     priority: 3.3
     targets_dim: 1
     use_masking: True
     streams:
-      predictions: flow3_binary_predictions
-      masks: flow3_binary_masks
-      targets: flow3_binary_answers_ids
-      loss: flow3_binary_loss
+      predictions: pipe3_binary_predictions
+      masks: pipe3_binary_masks
+      targets: pipe3_binary_answers_ids
+      loss: pipe3_binary_loss
 
-  flow3_binary_precision_recall:
+  pipe3_binary_precision_recall:
     type: PrecisionRecallStatistics
     priority: 3.4
     use_word_mappings: True
@@ -315,26 +315,26 @@ pipeline:
     #show_class_scores: True
     #show_confusion_matrix: True
     streams:
-      masks: flow3_binary_masks
-      predictions: flow3_binary_predictions
-      targets: flow3_binary_answers_ids
+      masks: pipe3_binary_masks
+      predictions: pipe3_binary_predictions
+      targets: pipe3_binary_answers_ids
     globals:
       word_mappings: word_mappings_binary_yn
       num_classes: vocabulary_size_binary_yn
     statistics:
-      precision: flow3_binary_precision
-      recall: flow3_binary_recall
-      f1score: flow3_binary_f1score
+      precision: pipe3_binary_precision
+      recall: pipe3_binary_recall
+      f1score: pipe3_binary_f1score
 
-  ################# FLOW 4: MERGE ANSWERS #################
+  ################# PIPE 4: MERGE ANSWERS #################
 
   # Merge predictions
   merged_predictions:
     type: JoinMaskedPredictions
     priority: 4.1
     # Names of used input streams.
-    input_prediction_streams: [flow2_c1_predictions, flow3_binary_predictions]
-    input_mask_streams: [flow2_c1_masks, flow3_binary_masks]
+    input_prediction_streams: [pipe2_c1_predictions, pipe3_binary_predictions]
+    input_mask_streams: [pipe2_c1_masks, pipe3_binary_masks]
     input_word_mappings: [word_mappings_c1_without_yn, word_mappings_binary_yn]
     globals:
       output_word_mappings: word_mappings_all_c1_binary
@@ -365,7 +365,7 @@ pipeline:
   viewer:
     type: StreamViewer
     priority: 4.3
-    input_streams: questions,answers, category_names,predicted_question_categories_names, flow2_c1_masks,flow2_c1_answers_without_yn_ids,flow2_c1_predictions, flow3_binary_masks,flow3_binary_answers_ids,flow3_binary_predictions, merged_predictions
+    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe2_c1_masks,pipe2_c1_answers_without_yn_ids,pipe2_c1_predictions, pipe3_binary_masks,pipe3_binary_answers_ids,pipe3_binary_predictions, merged_predictions
 
 
 #: pipeline

From 88b449f4c5a502b93033b731dc8b05114d6571b8 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 17 Apr 2019 13:02:24 -0700
Subject: [PATCH 20/39] Changed default out_of_vocabulary value to -100, which
 is used by PyTorch to indicate targets that will be omitted

---
 configs/default/components/masking/string_to_mask.yml | 7 +++++--
 configs/default/components/text/label_indexer.yml     | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/configs/default/components/masking/string_to_mask.yml b/configs/default/components/masking/string_to_mask.yml
index bcc363d..2b74456 100644
--- a/configs/default/components/masking/string_to_mask.yml
+++ b/configs/default/components/masking/string_to_mask.yml
@@ -4,9 +4,12 @@
 # 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
 ####################################################################
 
-# Value that will be used when word is out of vocavbulary (LOADED)
+# Value that will be used when word is out of vocabulary (LOADED)
 # (Mask for that element will be 0 as well)
-out_of_vocabulary_value: -1
+# -100 is the default value used by PyTroch loss functions to specify
+# target values that will ignored and does not contribute to the input gradient.
+# (ignore_index=-100)
+out_of_vocabulary_value: -100
 
 streams: 
   ####################################################################
diff --git a/configs/default/components/text/label_indexer.yml b/configs/default/components/text/label_indexer.yml
index 5b871e9..bfe9aa0 100644
--- a/configs/default/components/text/label_indexer.yml
+++ b/configs/default/components/text/label_indexer.yml
@@ -25,9 +25,12 @@ import_word_mappings_from_globals: False
 # Flag informing whether word mappings will be exported to globals (LOADED)
 export_word_mappings_to_globals: False
 
-# Value that will be used when word is out of vocavbulary (LOADED)
+# Value that will be used when word is out of vocabulary (LOADED)
 # (Mask for that element will be 0 as well)
-out_of_vocabulary_value: -1
+# -100 is the default value used by PyTroch loss functions to specify
+# target values that will ignored and does not contribute to the input gradient.
+# (ignore_index=-100)
+out_of_vocabulary_value: -100
 
 streams: 
   ####################################################################

From d47ddd321c1d316e02336c4b2f5af61f9efd6c90 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 17 Apr 2019 13:02:46 -0700
Subject: [PATCH 21/39] c1 + c2 + Y/N multimodal config

---
 ...nn_shared_all_encoders_two_ffns_losses.yml |   2 +-
 ...nn_shared_all_encoders_two_ffns_losses.yml | 472 ++++++++++++++++++
 2 files changed, 473 insertions(+), 1 deletion(-)
 create mode 100644 configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml

diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
index 4ffc007..6996f91 100644
--- a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
@@ -394,7 +394,7 @@ pipeline:
   # Viewers.
   viewer:
     type: StreamViewer
-    priority: 4.3
+    priority: 7.3
     input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_binary_masks,pipe6_binary_answers_ids,pipe6_binary_predictions, pipe7_merged_predictions
 
 
diff --git a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
new file mode 100644
index 0000000..efa6580
--- /dev/null
+++ b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
@@ -0,0 +1,472 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1,C3
+  sampler:
+    name: WeightedRandomSampler
+    weights: ~/data/vqa-med/answers.c1_c3_binary_yn.weights.csv
+  dataloader:
+    num_workers: 4
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1,C3
+  dataloader:
+    num_workers: 4
+
+
+pipeline:
+  name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix]
+    values: [100, 2, 10, 100,{"C1": 0}, {"C3": 0}, {"BINARY": 0}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  # Questions encoding.
+  pipe1_question_tokenizer:
+    priority: 0.2
+    type: SentenceTokenizer
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_questions_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    type: AccuracyStatistics
+    priority: 0.7
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+
+
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    type: LabelIndexer
+    priority: 1.3
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      #vocabulary_size: vocabulary_size_all_c1_c3_binary
+      word_mappings: word_mappings_all_c1_c3_binary
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 2.1
+    streams:
+      inputs: images
+      predictions: image_activations
+    globals:
+      prediction_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 3.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: SHARED CONCAT #################
+
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [questions_activations,image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100],[-1,10]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+
+  ################# PIPE 5: C1 question #################
+
+  # Answer encoding for PIPE 5.
+  pipe5_c1_answer_indexer:
+    type: LabelIndexer
+    priority: 5.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_without_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe5_c1_answers_without_yn_ids
+    globals:
+      vocabulary_size: vocabulary_size_c1_without_yn
+      word_mappings: word_mappings_c1_without_yn
+
+  # Sample masking based on categories.
+  pipe5_c1_string_to_mask:
+    priority: 5.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c1_question_categories_indices # NOT USED
+      masks: pipe5_c1_masks
+
+  # Model 4: FFN C1 answering
+  pipe5_c1_ffn:
+    priority: 5.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe5_c1_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c1_without_yn
+
+  pipe5_c1_nllloss:
+    type: NLLLoss
+    priority: 5.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe5_c1_predictions
+      masks: pipe5_c1_masks
+      targets: pipe5_c1_answers_without_yn_ids
+      loss: pipe5_c1_loss
+
+  pipe5_c1_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 5.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe5_c1_masks
+      predictions: pipe5_c1_predictions
+      targets: pipe5_c1_answers_without_yn_ids
+    globals:
+      word_mappings: word_mappings_c1_without_yn
+      #num_classes: vocabulary_size_c1_without_yn
+    statistics:
+      precision: pipe5_c1_precision
+      recall: pipe5_c1_recall
+      f1score: pipe5_c1_f1score
+
+  ################# PIPE 6: C3 question #################
+
+  # Answer encoding for PIPE 6.
+  pipe6_c3_answer_indexer:
+    type: LabelIndexer
+    priority: 6.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c3.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe6_c3_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c3
+      word_mappings: word_mappings_c3
+
+  # Sample masking based on categories.
+  pipe6_c3_string_to_mask:
+    priority: 6.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c3_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c3_question_categories_indices # NOT USED
+      masks: pipe6_c3_masks
+
+  # Model 4: FFN C1 answering
+  pipe6_c3_ffn:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_c3_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c3
+
+  pipe6_c3_nllloss:
+    type: NLLLoss
+    priority: 6.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe6_c3_predictions
+      masks: pipe6_c3_masks
+      targets: pipe6_c3_answers_ids
+      loss: pipe6_c3_loss
+
+  pipe6_c3_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 6.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe6_c3_masks
+      predictions: pipe6_c3_predictions
+      targets: pipe6_c3_answers_ids
+    globals:
+      word_mappings: word_mappings_c3
+      #num_classes: vocabulary_size_c3
+    statistics:
+      precision: pipe6_c3_precision
+      recall: pipe6_c3_recall
+      f1score: pipe6_c3_f1score
+
+  ################# PIPE 7: BINARY question #################
+
+  # Answer encoding for pipe 7.
+  pipe7_binary_answer_indexer:
+    type: LabelIndexer
+    priority: 7.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe7_binary_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_binary_yn
+      word_mappings: word_mappings_binary_yn
+
+  pipe7_binary_string_to_mask:
+    priority: 7.2
+    type: StringToMask
+    globals:
+      word_mappings: category_binary_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_binary_question_categories_indices # NOT USED
+      masks: pipe7_binary_masks
+
+  # Model 4: FFN C1 answering
+  pipe7_binary_ffn:
+    priority: 7.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe7_binary_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_binary_yn
+
+  pipe7_binary_nllloss:
+    type: NLLLoss
+    priority: 7.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe7_binary_predictions
+      masks: pipe7_binary_masks
+      targets: pipe7_binary_answers_ids
+      loss: pipe7_binary_loss
+
+  pipe7_binary_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 7.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe7_binary_masks
+      predictions: pipe7_binary_predictions
+      targets: pipe7_binary_answers_ids
+    globals:
+      word_mappings: word_mappings_binary_yn
+      #num_classes: vocabulary_size_binary_yn
+    statistics:
+      precision: pipe7_binary_precision
+      recall: pipe7_binary_recall
+      f1score: pipe7_binary_f1score
+
+  ################# PIPE 8: MERGE ANSWERS #################
+
+  # Merge predictions
+  pipe8_merged_predictions:
+    type: JoinMaskedPredictions
+    priority: 8.1
+    # Names of used input streams.
+    input_prediction_streams: [pipe5_c1_predictions, pipe6_c3_predictions, pipe7_binary_predictions]
+    input_mask_streams: [pipe5_c1_masks, pipe6_c3_masks, pipe7_binary_masks]
+    input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c3, word_mappings_binary_yn]
+    globals:
+      output_word_mappings: word_mappings_all_c1_c3_binary
+    streams:
+      output_strings: pipe8_merged_predictions
+      output_indices: pipe8_merged_pred_indices
+
+  # Statistics.
+  pipe8_merged_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 8.2
+    # Use prediction indices instead of distributions.
+    use_prediction_distributions: False
+    use_word_mappings: True
+    show_class_scores: True
+    show_confusion_matrix: True
+    globals:
+      word_mappings: word_mappings_all_c1_c3_binary
+    streams:
+      targets: all_answers_ids
+      predictions: pipe8_merged_pred_indices
+    statistics:
+      precision: pipe8_merged_precision
+      recall: pipe8_merged_recall
+      f1score: pipe8_merged_f1score
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 8.3
+    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c3_masks,pipe6_c3_answers_ids,pipe6_c3_predictions, pipe7_binary_masks,pipe7_binary_answers_ids,pipe7_binary_predictions, pipe8_merged_predictions
+
+
+#: pipeline

From 8d93eace685a72fee311580af6c51ab2a0976e45 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 17 Apr 2019 14:17:27 -0700
Subject: [PATCH 22/39] c2 multimodal model

---
 ...c2_classification_all_rnn_vgg16_concat.yml | 101 ++++++++++++++++++
 .../default_c2_classification.yml             |  91 ++++++++++++++++
 2 files changed, 192 insertions(+)
 create mode 100644 configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml
 create mode 100644 configs/vqa_med_2019/c2_classification/default_c2_classification.yml

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml
new file mode 100644
index 0000000..d4745b6
--- /dev/null
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml
@@ -0,0 +1,101 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
+
+pipeline:
+  name: vqa_med_c2_classification_all_rnn_vgg_concat
+
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
+    values: [100, 2, 10, 100]
+
+  # First subpipeline: question.
+  # Questions encoding.
+  question_tokenizer:
+    type: SentenceTokenizer
+    priority: 1.1
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: Embeddings
+  question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.2
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  question_lstm:
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    priority: 1.3
+    use_logsoftmax: False
+    initial_state_trainable: False
+    #num_layers: 5
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_embeddings_output_size
+
+  # 2nd subpipeline: image size.
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 2.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  # 3rd subpipeline: image.
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 3.1
+    streams:
+      inputs: images
+      predictions: image_activations
+    globals:
+      prediction_size: image_encoder_output_size
+  
+  # 4th subpipeline: concatenation + FF.
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [question_activations,image_size_activations,image_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,10],[-1,100]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: output_size
+
+
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    priority: 4.2
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: output_size
+      prediction_size: vocabulary_size_c2
+
+
+  #: pipeline
diff --git a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml
new file mode 100644
index 0000000..3df45b4
--- /dev/null
+++ b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml
@@ -0,0 +1,91 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C2
+  sampler:
+    name: WeightedRandomSampler
+    weights: ~/data/vqa-med/answers.c2.weights.csv
+  dataloader:
+    num_workers: 4
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C2
+  dataloader:
+    num_workers: 4
+
+
+pipeline:
+
+  # Answer encoding.
+  answer_indexer:
+    type: LabelIndexer
+    priority: 0.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c2.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c2
+      word_mappings: word_mappings_c2
+
+
+  # Predictions decoder.
+  prediction_decoder:
+    type: WordDecoder
+    priority: 10.1
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predictions
+      outputs: predicted_answers
+    globals:
+      vocabulary_size: vocabulary_size_c2
+      word_mappings: word_mappings_c2
+
+  # Loss
+  nllloss:
+    type: NLLLoss
+    priority: 10.2
+    targets_dim: 1
+    streams:
+      targets: answers_ids
+      loss: loss
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 100.1
+
+  #accuracy:
+  #  type: AccuracyStatistics
+  #  priority: 100.2
+  #  streams:
+  #    targets: answers_ids
+
+  precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 100.3
+    use_word_mappings: True
+    show_class_scores: True
+    show_confusion_matrix: True
+    streams:
+      targets: answers_ids
+    globals:
+      word_mappings: word_mappings_c2
+      num_classes: vocabulary_size_c2
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 100.4
+    input_streams: questions,category_names,answers,predicted_answers
+
+#: pipeline

From a7a909779360c02a3fb17317b84dd5233961df32 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 17 Apr 2019 14:20:16 -0700
Subject: [PATCH 23/39] rename
 c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses

---
 ...binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename configs/vqa_med_2019/vf/{c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml => c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml} (99%)

diff --git a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml
similarity index 99%
rename from configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
rename to configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml
index efa6580..3b1d952 100644
--- a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml
@@ -20,7 +20,7 @@ validation:
 
 
 pipeline:
-  name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses
+  name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses
   
   ################# PIPE 0: SHARED #################
 

From 4bc779cec6f2c36e71840193a45b58e981ea23d0 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Thu, 18 Apr 2019 06:17:18 -0700
Subject: [PATCH 24/39] microupdate of wikitext lm rnn config

---
 configs/wikitext/wikitext_language_modeling_rnn.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml
index 0d5a5c3..88274b9 100644
--- a/configs/wikitext/wikitext_language_modeling_rnn.yml
+++ b/configs/wikitext/wikitext_language_modeling_rnn.yml
@@ -72,7 +72,7 @@ pipeline:
   lstm:
     type: RecurrentNeuralNetwork
     priority: 3
-    #initial_state_trainable: False
+    initial_state_trainable: False
     streams:
       inputs: embedded_sources
     globals:
@@ -88,4 +88,10 @@ pipeline:
       targets: indexed_targets
       loss: loss
 
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 100.1
+    input_streams: sources,indexed_targets,targets,predictions
+
 #: pipeline

From f2f10fbd2c5d138c9b4d5ce3928f5812dc9fe1a0 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Thu, 18 Apr 2019 06:41:06 -0700
Subject: [PATCH 25/39] 
 c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml

---
 ...n_shared_all_encoders_four_ffns_losses.yml | 542 ++++++++++++++++++
 1 file changed, 542 insertions(+)
 create mode 100644 configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml

diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml
new file mode 100644
index 0000000..065a14e
--- /dev/null
+++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml
@@ -0,0 +1,542 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1,C2,C3
+  sampler:
+    name: WeightedRandomSampler
+    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+  dataloader:
+    num_workers: 4
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1,C2,C3
+  dataloader:
+    num_workers: 4
+
+
+pipeline:
+  name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c2_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix]
+    values: [100, 2, 10, 100, {"C1": 0}, {"C2": 0}, {"C3": 0}, {"BINARY": 0}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  # Questions encoding.
+  pipe1_question_tokenizer:
+    priority: 0.2
+    type: SentenceTokenizer
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_questions_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    type: AccuracyStatistics
+    priority: 0.7
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+
+
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    type: LabelIndexer
+    priority: 1.3
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      #vocabulary_size: vocabulary_size_all_c1_c2_c3_binary
+      word_mappings: word_mappings_all_c1_c2_c3_binary
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 2.1
+    streams:
+      inputs: images
+      predictions: image_activations
+    globals:
+      prediction_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 3.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: SHARED CONCAT #################
+
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [questions_activations,image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100],[-1,10]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+
+  ################# PIPE 5: C1 question #################
+
+  # Answer encoding for PIPE 5.
+  pipe5_c1_answer_indexer:
+    type: LabelIndexer
+    priority: 5.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_without_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe5_c1_answers_without_yn_ids
+    globals:
+      vocabulary_size: vocabulary_size_c1_without_yn
+      word_mappings: word_mappings_c1_without_yn
+
+  # Sample masking based on categories.
+  pipe5_c1_string_to_mask:
+    priority: 5.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c1_question_categories_indices # NOT USED
+      masks: pipe5_c1_masks
+
+  # Model 4: FFN C1 answering
+  pipe5_c1_ffn:
+    priority: 5.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe5_c1_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c1_without_yn
+
+  pipe5_c1_nllloss:
+    type: NLLLoss
+    priority: 5.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe5_c1_predictions
+      masks: pipe5_c1_masks
+      targets: pipe5_c1_answers_without_yn_ids
+      loss: pipe5_c1_loss
+
+  pipe5_c1_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 5.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe5_c1_masks
+      predictions: pipe5_c1_predictions
+      targets: pipe5_c1_answers_without_yn_ids
+    globals:
+      word_mappings: word_mappings_c1_without_yn
+      #num_classes: vocabulary_size_c1_without_yn
+    statistics:
+      precision: pipe5_c1_precision
+      recall: pipe5_c1_recall
+      f1score: pipe5_c1_f1score
+
+  ################# PIPE 6: C2 question #################
+
+  # Answer encoding for PIPE 6.
+  pipe6_c2_answer_indexer:
+    type: LabelIndexer
+    priority: 6.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c2.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe6_c2_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c2
+      word_mappings: word_mappings_c2
+
+  # Sample masking based on categories.
+  pipe6_c2_string_to_mask:
+    priority: 6.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c2_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c2_question_categories_indices # NOT USED
+      masks: pipe6_c2_masks
+
+  # Model 4: FFN C1 answering
+  pipe6_c2_ffn:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_c2_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c2
+
+  pipe6_c2_nllloss:
+    type: NLLLoss
+    priority: 6.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe6_c2_predictions
+      masks: pipe6_c2_masks
+      targets: pipe6_c2_answers_ids
+      loss: pipe6_c2_loss
+
+  pipe6_c2_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 6.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe6_c2_masks
+      predictions: pipe6_c2_predictions
+      targets: pipe6_c2_answers_ids
+    globals:
+      word_mappings: word_mappings_c2
+    statistics:
+      precision: pipe6_c2_precision
+      recall: pipe6_c2_recall
+      f1score: pipe6_c2_f1score
+
+  ################# PIPE 7: C3 question #################
+
+  # Answer encoding for PIPE 7.
+  pipe7_c3_answer_indexer:
+    type: LabelIndexer
+    priority: 7.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c3.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe7_c3_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c3
+      word_mappings: word_mappings_c3
+
+  # Sample masking based on categories.
+  pipe7_c3_string_to_mask:
+    priority: 7.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c3_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c3_question_categories_indices # NOT USED
+      masks: pipe7_c3_masks
+
+  # Model 4: FFN C1 answering
+  pipe7_c3_ffn:
+    priority: 7.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe7_c3_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c3
+
+  pipe7_c3_nllloss:
+    type: NLLLoss
+    priority: 7.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe7_c3_predictions
+      masks: pipe7_c3_masks
+      targets: pipe7_c3_answers_ids
+      loss: pipe7_c3_loss
+
+  pipe7_c3_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 7.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe7_c3_masks
+      predictions: pipe7_c3_predictions
+      targets: pipe7_c3_answers_ids
+    globals:
+      word_mappings: word_mappings_c3
+      #num_classes: vocabulary_size_c3
+    statistics:
+      precision: pipe7_c3_precision
+      recall: pipe7_c3_recall
+      f1score: pipe7_c3_f1score
+
+  ################# PIPE 8: BINARY question #################
+
+  # Answer encoding for pipe 8.
+  pipe8_binary_answer_indexer:
+    type: LabelIndexer
+    priority: 8.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe8_binary_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_binary_yn
+      word_mappings: word_mappings_binary_yn
+
+  pipe8_binary_string_to_mask:
+    priority: 8.2
+    type: StringToMask
+    globals:
+      word_mappings: category_binary_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_binary_question_categories_indices # NOT USED
+      masks: pipe8_binary_masks
+
+  # Model 4: FFN C1 answering
+  pipe8_binary_ffn:
+    priority: 8.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe8_binary_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_binary_yn
+
+  pipe8_binary_nllloss:
+    type: NLLLoss
+    priority: 8.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe8_binary_predictions
+      masks: pipe8_binary_masks
+      targets: pipe8_binary_answers_ids
+      loss: pipe8_binary_loss
+
+  pipe8_binary_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 8.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe8_binary_masks
+      predictions: pipe8_binary_predictions
+      targets: pipe8_binary_answers_ids
+    globals:
+      word_mappings: word_mappings_binary_yn
+      #num_classes: vocabulary_size_binary_yn
+    statistics:
+      precision: pipe8_binary_precision
+      recall: pipe8_binary_recall
+      f1score: pipe8_binary_f1score
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Merge predictions.
+  pipe9_merged_predictions:
+    type: JoinMaskedPredictions
+    priority: 9.1
+    # Names of used input streams.
+    input_prediction_streams: [pipe5_c1_predictions, pipe6_c2_predictions, pipe7_c3_predictions, pipe8_binary_predictions]
+    input_mask_streams: [pipe5_c1_masks, pipe6_c2_masks, pipe7_c3_masks, pipe8_binary_masks]
+    input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c2, word_mappings_c3, word_mappings_binary_yn]
+    globals:
+      output_word_mappings: word_mappings_all_c1_c2_c3_binary
+    streams:
+      output_strings: pipe9_merged_predictions
+      output_indices: pipe9_merged_pred_indices
+
+  # Statistics.
+  pipe9_merged_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 9.2
+    # Use prediction indices instead of distributions.
+    use_prediction_distributions: False
+    use_word_mappings: True
+    show_class_scores: True
+    show_confusion_matrix: True
+    globals:
+      word_mappings: word_mappings_all_c1_c2_c3_binary
+    streams:
+      targets: all_answers_ids
+      predictions: pipe9_merged_pred_indices
+    statistics:
+      precision: pipe9_merged_precision
+      recall: pipe9_merged_recall
+      f1score: pipe9_merged_f1score
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 9.3
+    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions
+
+
+#: pipeline

From 6cf7f216d83cf3b1b8efbb9a0511554f3edd7894 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Thu, 18 Apr 2019 06:44:51 -0700
Subject: [PATCH 26/39] added viewing of streams related to C2

---
 ...3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml
index 065a14e..56ab04b 100644
--- a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml
+++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml
@@ -536,7 +536,7 @@ pipeline:
   viewer:
     type: StreamViewer
     priority: 9.3
-    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions
+    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c2_masks,pipe6_c2_answers_ids,pipe6_c2_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions
 
 
 #: pipeline

From aa829fbf3825f5318b7e27d1363d7ed068840008 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Thu, 18 Apr 2019 06:54:44 -0700
Subject: [PATCH 27/39] one ffn

---
 ...t_rnn_shared_all_encoders_one_ffn_loss.yml | 298 ++++++++++++++++++
 1 file changed, 298 insertions(+)
 create mode 100644 configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml

diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml
new file mode 100644
index 0000000..3c47597
--- /dev/null
+++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml
@@ -0,0 +1,298 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    categories: C1,C2,C3
+  sampler:
+    name: WeightedRandomSampler
+    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+  dataloader:
+    num_workers: 4
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C1,C2,C3
+  dataloader:
+    num_workers: 4
+
+
+pipeline:
+  name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss
+  
+  ################# PIPE 0: SHARED #################
+
+  # Add global variables.
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 0
+    # Add input_size to globals.
+    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix]
+    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}]
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 0.1
+
+  # Questions encoding.
+  pipe1_question_tokenizer:
+    priority: 0.2
+    type: SentenceTokenizer
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  ################# PIPE 0: CATEGORY #################
+
+  # Model 1: question embeddings
+  pipe0_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 0.3
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: question_embeddings
+    freeze: True
+    ###################
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: pipe0_embedded_questions      
+  
+  # Model 2: question RNN
+  pipe0_lstm:
+    priority: 0.4
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: lstm
+    freeze: True
+    ###################
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_embedded_questions
+      predictions: pipe0_questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Model 3: FFN question category
+  pipe0_classifier:
+    priority: 0.5
+    type: FeedForwardNetwork
+    # LOAD AND FREEZE #
+    load: 
+      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      model: classifier
+    freeze: True
+    ###################
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: pipe0_questions_activations
+      predictions: pipe0_predicted_question_categories_preds
+    globals:
+      input_size: question_lstm_output_size # Set by global publisher
+      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
+
+  pipe0_category_decoder:
+    priority: 0.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe0_predicted_question_categories_preds
+      outputs: pipe0_predicted_question_categories_names
+    globals:
+      vocabulary_size: num_categories
+      word_mappings: category_word_mappings
+
+  pipe0_category_accuracy:
+    type: AccuracyStatistics
+    priority: 0.7
+    streams:
+      targets: category_ids
+      predictions: pipe0_predicted_question_categories_preds
+    statistics:
+      accuracy: categorization_accuracy
+  
+
+
+  ################# PIPE 1: SHARED QUESTION ENCODER #################
+
+  # Model 1: question embeddings
+  pipe1_question_embeddings:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: question RNN
+  pipe1_lstm:
+    priority: 1.2
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state_trainable: True
+    use_logsoftmax: False
+    dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: questions_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_lstm_output_size
+
+  # Answer encoding
+  pipe1_all_answer_indexer:
+    type: LabelIndexer
+    priority: 1.3
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_all_c1_c2_c3_binary
+      word_mappings: word_mappings_all_c1_c2_c3_binary
+
+  ################# PIPE 2: SHARED IMAGE ENCODER #################
+
+  # Image encoder.
+  image_encoder:
+    type: TorchVisionWrapper
+    priority: 2.1
+    streams:
+      inputs: images
+      predictions: image_activations
+    globals:
+      prediction_size: image_encoder_output_size
+
+  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
+
+  # Model - image size classifier.
+  image_size_encoder:
+    type: FeedForwardNetwork 
+    priority: 3.1
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  ################# PIPE 4: SHARED CONCAT #################
+
+  concat:
+    type: Concatenation
+    priority: 4.1
+    input_streams: [questions_activations,image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100],[-1,10]]
+    output_dims: [-1,210]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_activations_size
+
+
+  ################# PIPE 5: C1 + C2 + C2 + Binary Y/N question #################
+
+  # Answer encoding for PIPE 5.
+  pipe5_all_answer_indexer:
+    type: LabelIndexer
+    priority: 5.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: pipe5_all_answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c1_c2_c3_binary
+      word_mappings: word_mappings_all_c1_c2_c3_binary
+
+  # Sample masking based on categories.
+  pipe5_all_string_to_mask:
+    priority: 5.2
+    type: StringToMask
+    globals:
+      word_mappings: category_c1_c2_c3_binary_yn_word_to_ix
+    streams:
+      strings: pipe0_predicted_question_categories_names
+      string_indices: predicted_c1_c2_c3_binary_by_question_categories_indices # NOT USED
+      masks: pipe5_all_masks
+
+  # Model 4: FFN C1 answering
+  pipe5_all_ffn:
+    priority: 5.3
+    type: FeedForwardNetwork
+    hidden: [50]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe5_all_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c1_c2_c3_binary
+
+  pipe5_all_nllloss:
+    type: NLLLoss
+    priority: 5.4
+    targets_dim: 1
+    use_masking: True
+    streams:
+      predictions: pipe5_all_predictions
+      masks: pipe5_all_masks
+      targets: pipe5_all_answers_ids
+      loss: pipe5_all_loss
+
+  pipe5_all_precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 5.5
+    use_word_mappings: True
+    use_masking: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      masks: pipe5_all_masks
+      predictions: pipe5_all_predictions
+      targets: pipe5_all_answers_ids
+    globals:
+      word_mappings: word_mappings_all_c1_c2_c3_binary
+    statistics:
+      precision: pipe5_all_precision
+      recall: pipe5_all_recall
+      f1score: pipe5_all_f1score
+
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 9.3
+    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_all_masks,pipe5_all_answers_without_yn_ids,pipe5_all_predictions
+
+
+#: pipeline

From 48c49647ae042fda70cb1b539c6c496aeebff718 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Thu, 18 Apr 2019 06:55:18 -0700
Subject: [PATCH 28/39] rename
 c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss

---
 ...1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename configs/vqa_med_2019/vf/{c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml => c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml} (99%)

diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml
similarity index 99%
rename from configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml
rename to configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml
index 3c47597..ef8f535 100644
--- a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml
+++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml
@@ -20,7 +20,7 @@ validation:
 
 
 pipeline:
-  name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss
+  name: c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss
   
   ################# PIPE 0: SHARED #################
 

From a4527749c300e24e0283fa83e325dae89bd3a1ca Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <43558388+tkornut@users.noreply.github.com>
Date: Fri, 19 Apr 2019 09:36:44 -0700
Subject: [PATCH 29/39] Update README.md

---
 README.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index f13e0a7..ee9cd50 100644
--- a/README.md
+++ b/README.md
@@ -11,14 +11,15 @@
 
 ## Description
 
-PyTorchPipe (PTP) aims at _accelerating reproducible Machine Learning Research_ by fostering the development of computational _pipelines_ and comparison of diverse neural network-based models. 
-
-In its core, to _accelerate the computations_ on their own, PTP relies on PyTorch and extensively uses its mechanisms for distribution of computations on CPUs/GPUs.
+PyTorchPipe (PTP) fosters the development of computational _pipelines_ and comparison of diverse neural network-based models. 
 
 PTP frames training and testing procedures as _pipelines_ consisting of many components communicating through data streams.
 Each such a stream can consist of several components, including one problem instance (providing batches of data), (zero-or-more) trainable models and (any number of) additional components providing required transformations and computations.
 
-As a result, the training & testing mechanisms are no longer pinned to a specific model or problem, and built-in mechanisms for compatibility checking (handshaking), configuration management & statistics collection facilitate running diverse experiments.
+As a result, the training & testing procedures are no longer pinned to a specific problem or model, and built-in mechanisms for compatibility checking (handshaking), configuration management & statistics collection facilitate running diverse experiments.
+
+In its core, to _accelerate the computations_ on their own, PTP relies on PyTorch and extensively uses its mechanisms for distribution of computations on CPUs/GPUs.
+
 
 ## Installation
 

From 1d4b93ad00edb49bb66691e33c7569cfba81e099 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Fri, 19 Apr 2019 10:56:16 -0700
Subject: [PATCH 30/39] first version of component calculating BLEU score

---
 .../components/publishers/bleu_statistics.yml |  50 ++++
 .../wikitext_language_modeling_rnn.yml        |  23 +-
 ptp/components/publishers/__init__.py         |   2 +
 ptp/components/publishers/bleu_statistics.py  | 223 ++++++++++++++++++
 ptp/components/text/sentence_indexer.py       |   2 +-
 5 files changed, 296 insertions(+), 4 deletions(-)
 create mode 100644 configs/default/components/publishers/bleu_statistics.yml
 create mode 100644 ptp/components/publishers/bleu_statistics.py

diff --git a/configs/default/components/publishers/bleu_statistics.yml b/configs/default/components/publishers/bleu_statistics.yml
new file mode 100644
index 0000000..2a52fa3
--- /dev/null
+++ b/configs/default/components/publishers/bleu_statistics.yml
@@ -0,0 +1,50 @@
+# This file defines the default values for the Accuracy statistics.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Flag indicating whether prediction are represented as distributions or indices (LOADED)
+# Options: True (expects distribution for each preditions)
+#          False (expects indices (max args))
+use_prediction_distributions: True
+
+# When set to True, performs masking of selected samples from batch (LOADED)
+# TODO!
+#use_masking: False
+
+streams: 
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing targets (label ids) (INPUT)
+  targets: targets
+
+  # Stream containing batch of predictions (INPUT)
+  predictions: predictions
+
+  # Stream containing masks used for masking of selected samples from batch (INPUT)
+  #masks: masks
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  # Word mappings used for mappings of predictions/targets into list of words (RERIEVED)
+  word_mappings: word_mappings
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+statistics:
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
+
+  # Name used for collected statistics (ADDED).
+  bleu: bleu
+
+
diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml
index 88274b9..d2806d5 100644
--- a/configs/wikitext/wikitext_language_modeling_rnn.yml
+++ b/configs/wikitext/wikitext_language_modeling_rnn.yml
@@ -86,12 +86,29 @@ pipeline:
     num_targets_dims: 2
     streams:
       targets: indexed_targets
-      loss: loss
 
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 100.0
+
+  #accuracy:
+  #  type: AccuracyStatistics
+  #  priority: 100.1
+  #  streams:
+  #    targets: indexed_targets
+
+  bleu:
+    type: BLEUStatistics
+    priority: 100.2
+    streams:
+      targets: indexed_targets
+
+      
   # Viewers.
   viewer:
     type: StreamViewer
-    priority: 100.1
-    input_streams: sources,indexed_targets,targets,predictions
+    priority: 100.3
+    input_streams: sources,targets,indexed_targets,predictions
 
 #: pipeline
diff --git a/ptp/components/publishers/__init__.py b/ptp/components/publishers/__init__.py
index a412f6f..1db7f75 100644
--- a/ptp/components/publishers/__init__.py
+++ b/ptp/components/publishers/__init__.py
@@ -1,11 +1,13 @@
 from .accuracy_statistics import AccuracyStatistics
 from .batch_size_statistics import BatchSizeStatistics
+from .bleu_statistics import BLEUStatistics
 from .global_variable_publisher import GlobalVariablePublisher
 from .precision_recall_statistics import PrecisionRecallStatistics
 
 __all__ = [
     'AccuracyStatistics',
     'BatchSizeStatistics',
+    'BLEUStatistics',
     'GlobalVariablePublisher',
     'PrecisionRecallStatistics',
     ]
diff --git a/ptp/components/publishers/bleu_statistics.py b/ptp/components/publishers/bleu_statistics.py
new file mode 100644
index 0000000..eafb7ea
--- /dev/null
+++ b/ptp/components/publishers/bleu_statistics.py
@@ -0,0 +1,223 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) tkornuta, IBM Corporation 2019
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__author__ = "Tomasz Kornuta"
+
+import torch
+import math
+import numpy as np
+from nltk.translate.bleu_score import sentence_bleu
+
+from ptp.components.component import Component
+from ptp.data_types.data_definition import DataDefinition
+
+
+class BLEUStatistics(Component):
+    """
+    Class collecting statistics: BLEU (Bilingual Evaluation Understudy Score).
+
+    It accepts targets and predictions represented as indices of words and uses the provided word mappings to change those into words used finally for calculation of BLEU similarity.
+
+    """
+
+    def __init__(self, name, config):
+        """
+        Initializes object.
+
+        :param name: Loss name.
+        :type name: str
+
+        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
+        :type config: :py:class:`ptp.configuration.ConfigInterface`
+
+        """
+        # Call constructors of parent classes.
+        Component.__init__(self, name, BLEUStatistics, config)
+
+        # Get stream key mappings.
+        self.key_targets = self.stream_keys["targets"]
+        self.key_predictions = self.stream_keys["predictions"]
+        self.key_masks = self.stream_keys["masks"]
+
+        # Get prediction distributions/indices flag.
+        self.use_prediction_distributions = self.config["use_prediction_distributions"]
+
+        # Get masking flag.
+        #self.use_masking = self.config["use_masking"]
+
+        # Retrieve word mappings from globals.
+        word_to_ix = self.globals["word_mappings"]
+        # Construct reverse mapping for faster processing.
+        self.ix_to_word = dict((v,k) for k,v in word_to_ix.items())
+
+
+        # Get statistics key mappings.
+        self.key_bleu = self.statistics_keys["bleu"]
+
+
+    def input_data_definitions(self):
+        """ 
+        Function returns a dictionary with definitions of input data that are required by the component.
+
+        :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        # Add targets.
+        input_defs = {
+            self.key_targets: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words  [BATCH_SIZE x SEQ_LENGTH]"),
+            }
+        # Add predictions.
+        if self.use_prediction_distributions:
+            input_defs[self.key_predictions] = DataDefinition([-1, -1, -1], [torch.Tensor], "Batch of predictions, represented as tensor with sequences of probability distributions over classes [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES]")
+        else: 
+            input_defs[self.key_predictions] = DataDefinition([-1, -1], [torch.Tensor], "Batch of predictions, represented as tensor with sequences of indices of predicted answers [BATCH_SIZE x SEQ_LENGTH]")
+        # Add masks.
+        #if self.use_masking:
+        #    input_defs[self.key_masks] = DataDefinition([-1, -1], [torch.Tensor], "Batch of masks (separate mask for each sequence in the batch) [BATCH_SIZE x SEQ_LENGTH]")
+        return input_defs
+
+
+    def output_data_definitions(self):
+        """ 
+        Function returns a empty dictionary with definitions of output data produced the component.
+
+        :return: Empty dictionary.
+        """
+        return {}
+
+
+    def __call__(self, data_dict):
+        """
+        Call method - empty for all statistics.
+        """
+        pass
+
+
+    def calculate_BLEU(self, data_dict):
+        """
+        Calculates BLEU for predictions of a given batch.
+
+        :param data_dict: DataDict containing the targets and predictions (and optionally masks).
+        :type data_dict: DataDict
+
+        :return: Accuracy.
+
+        """
+        # Get targets.
+        targets = data_dict[self.key_targets].data.cpu().numpy().tolist()
+
+        if self.use_prediction_distributions:
+            # Get indices of the max log-probability.
+            preds = data_dict[self.key_predictions].max(1)[1].data.cpu().numpy().tolist()
+        else: 
+            preds = data_dict[self.key_predictions].data.cpu().numpy().tolist()
+
+        #if self.use_masking:
+        #    # Get masks from inputs.
+        #    masks = data_dict[self.key_masks].data.cpu().numpy().tolist()
+        #else:
+        #    batch_size = preds.shape[0]       
+        
+        # Calculate the correct predictinos.
+        scores = []
+
+        for target_indices, pred_indices in zip(targets, preds):
+            # Change target indices to words.
+            target_words = []
+            for t_ind in target_indices:
+                if t_ind in self.ix_to_word.keys():
+                    target_words.append(self.ix_to_word[t_ind])
+            # Change prediction indices to words.
+            pred_words = []
+            for p_ind in pred_indices:
+                if p_ind in self.ix_to_word.keys():
+                    pred_words.append(self.ix_to_word[p_ind])
+            # Calculate BLEU.
+            scores.append(sentence_bleu(target_words, pred_words))
+
+        # Get batch size.
+        batch_size = len(targets)
+
+        # Normalize by batch size.
+        if batch_size > 0:
+            score = sum(scores) / batch_size
+        else:
+            score = 0
+
+        return score
+
+
+    def add_statistics(self, stat_col):
+        """
+        Adds 'accuracy' statistics to ``StatisticsCollector``.
+
+        :param stat_col: ``StatisticsCollector``.
+
+        """
+        stat_col.add_statistics(self.key_bleu, '{:6.4f}')
+
+    def collect_statistics(self, stat_col, data_dict):
+        """
+        Collects statistics (batch_size) for given episode.
+
+        :param stat_col: ``StatisticsCollector``.
+
+        """
+        stat_col[self.key_bleu] = self.calculate_BLEU(data_dict)
+
+    def add_aggregators(self, stat_agg):
+        """
+        Adds aggregator summing samples from all collected batches.
+
+        :param stat_agg: ``StatisticsAggregator``.
+
+        """
+        stat_agg.add_aggregator(self.key_bleu, '{:7.5f}')  # represents the average accuracy
+        #stat_agg.add_aggregator(self.key_bleu+'_min', '{:7.5f}')
+        #stat_agg.add_aggregator(self.key_bleu+'_max', '{:7.5f}')
+        stat_agg.add_aggregator(self.key_bleu+'_std', '{:7.5f}')
+
+
+    def aggregate_statistics(self, stat_col, stat_agg):
+        """
+        Aggregates samples from all collected batches.
+
+        :param stat_col: ``StatisticsCollector``
+
+        :param stat_agg: ``StatisticsAggregator``
+
+        """
+        scores = stat_col[self.key_bleu]
+
+        # Check if batch size was collected.
+        if "batch_size" in stat_col.keys():
+            batch_sizes = stat_col['batch_size']
+
+            # Calculate weighted precision.
+            scores_avg = np.average(scores, weights=batch_sizes)
+            scores_var = np.average((scores-scores_avg)**2, weights=batch_sizes)
+
+            stat_agg[self.key_bleu] = scores_avg
+            #stat_agg[self.key_bleu+'_min'] = np.min(scores)
+            #stat_agg[self.key_bleu+'_max'] = np.max(scores)
+            stat_agg[self.key_bleu+'_std'] = math.sqrt(scores_var)
+        else:
+            # Else: use simple mean.
+            stat_agg[self.key_bleu] = np.mean(scores)
+            #stat_agg[self.key_bleu+'_min'] = np.min(scores)
+            #stat_agg[self.key_bleu+'_max'] = np.max(scores)
+            stat_agg[self.key_bleu+'_std'] = np.std(scores)
+            # But inform user about that!
+            self.logger.warning("Aggregated statistics might contain errors due to the lack of information about sizes of aggregated batches")
diff --git a/ptp/components/text/sentence_indexer.py b/ptp/components/text/sentence_indexer.py
index 18394f9..abaf94a 100644
--- a/ptp/components/text/sentence_indexer.py
+++ b/ptp/components/text/sentence_indexer.py
@@ -61,7 +61,7 @@ def output_data_definitions(self):
         :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
         """
         return {
-            self.key_outputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices [BATCH_SIZE x SEQ_LENGTH]"),
+            self.key_outputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words  [BATCH_SIZE x SEQ_LENGTH]"),
             }
 
     def __call__(self, data_dict):

From 9b3f977eb928399d2ba50782a767e6b0671ac291 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Fri, 19 Apr 2019 10:57:27 -0700
Subject: [PATCH 31/39] first version of component calculating BLEU score

---
 ptp/components/publishers/bleu_statistics.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ptp/components/publishers/bleu_statistics.py b/ptp/components/publishers/bleu_statistics.py
index eafb7ea..4c19564 100644
--- a/ptp/components/publishers/bleu_statistics.py
+++ b/ptp/components/publishers/bleu_statistics.py
@@ -146,6 +146,9 @@ def calculate_BLEU(self, data_dict):
                     pred_words.append(self.ix_to_word[p_ind])
             # Calculate BLEU.
             scores.append(sentence_bleu(target_words, pred_words))
+            print("TARGET: {}\n".format(target_words))
+            print("PREDICTION: {}\n".format(pred_words))
+            print("BLEU: {}\n".format(scores[-1]))
 
         # Get batch size.
         batch_size = len(targets)

From 806e00811b7c4e9f701616545a14250a0c1e69f2 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Fri, 19 Apr 2019 14:13:28 -0700
Subject: [PATCH 32/39] bleu with weights, fixed bug with max along item axis

---
 .../components/publishers/bleu_statistics.yml   |  3 +++
 ptp/components/publishers/bleu_statistics.py    | 17 ++++++++++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/configs/default/components/publishers/bleu_statistics.yml b/configs/default/components/publishers/bleu_statistics.yml
index 2a52fa3..a79a245 100644
--- a/configs/default/components/publishers/bleu_statistics.yml
+++ b/configs/default/components/publishers/bleu_statistics.yml
@@ -13,6 +13,9 @@ use_prediction_distributions: True
 # TODO!
 #use_masking: False
 
+# Weights of n-grams used when calculating the score.
+weights: [0.25, 0.25, 0.25, 0.25]
+
 streams: 
   ####################################################################
   # 2. Keymappings associated with INPUT and OUTPUT streams.
diff --git a/ptp/components/publishers/bleu_statistics.py b/ptp/components/publishers/bleu_statistics.py
index 4c19564..b303ea9 100644
--- a/ptp/components/publishers/bleu_statistics.py
+++ b/ptp/components/publishers/bleu_statistics.py
@@ -63,6 +63,9 @@ def __init__(self, name, config):
         # Construct reverse mapping for faster processing.
         self.ix_to_word = dict((v,k) for k,v in word_to_ix.items())
 
+        # Get masking flag.
+        self.weights = self.config["weights"]
+
 
         # Get statistics key mappings.
         self.key_bleu = self.statistics_keys["bleu"]
@@ -120,7 +123,7 @@ def calculate_BLEU(self, data_dict):
 
         if self.use_prediction_distributions:
             # Get indices of the max log-probability.
-            preds = data_dict[self.key_predictions].max(1)[1].data.cpu().numpy().tolist()
+            preds = data_dict[self.key_predictions].max(-1)[1].data.cpu().numpy().tolist()
         else: 
             preds = data_dict[self.key_predictions].data.cpu().numpy().tolist()
 
@@ -133,6 +136,9 @@ def calculate_BLEU(self, data_dict):
         # Calculate the correct predictinos.
         scores = []
 
+        #print("targets ({}): {}\n".format(len(targets), targets[0]))
+        #print("preds ({}): {}\n".format(len(preds), preds[0]))
+
         for target_indices, pred_indices in zip(targets, preds):
             # Change target indices to words.
             target_words = []
@@ -145,11 +151,12 @@ def calculate_BLEU(self, data_dict):
                 if p_ind in self.ix_to_word.keys():
                     pred_words.append(self.ix_to_word[p_ind])
             # Calculate BLEU.
-            scores.append(sentence_bleu(target_words, pred_words))
-            print("TARGET: {}\n".format(target_words))
-            print("PREDICTION: {}\n".format(pred_words))
-            print("BLEU: {}\n".format(scores[-1]))
+            scores.append(sentence_bleu([target_words], pred_words, self.weights))
+            #print("TARGET: {}\n".format(target_words))
+            #print("PREDICTION: {}\n".format(pred_words))
+            #print("BLEU: {}\n".format(scores[-1]))
 
+            
         # Get batch size.
         batch_size = len(targets)
 

From 4d133f41b0ed255934dc0328abbab68f998a9222 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Fri, 19 Apr 2019 14:36:49 -0700
Subject: [PATCH 33/39] Fixed mutltiple inheritance issue with mixin
 WordEmbeddings  class

---
 .../components/text/sentence_indexer.yml      |  3 +++
 .../wikitext_language_modeling_rnn.yml        |  4 +--
 ptp/components/mixins/word_mappings.py        | 25 ++++++++-----------
 ptp/components/models/sentence_embeddings.py  |  2 +-
 ptp/components/text/label_indexer.py          |  8 +++---
 ptp/components/text/sentence_indexer.py       | 14 ++++++++---
 .../text/sentence_one_hot_encoder.py          |  8 +++---
 ptp/components/text/word_decoder.py           |  8 +++---
 8 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/configs/default/components/text/sentence_indexer.yml b/configs/default/components/text/sentence_indexer.yml
index 65d5d03..25c2f5e 100644
--- a/configs/default/components/text/sentence_indexer.yml
+++ b/configs/default/components/text/sentence_indexer.yml
@@ -25,6 +25,9 @@ import_word_mappings_from_globals: False
 # Flag informing whether word mappings will be exported to globals (LOADED)
 export_word_mappings_to_globals: False
 
+# Operation mode. If 'reverse' is True, then it will change indices into words (LOADED)
+reverse: False
+
 streams: 
   ####################################################################
   # 2. Keymappings associated with INPUT and OUTPUT streams.
diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml
index d2806d5..3e87643 100644
--- a/configs/wikitext/wikitext_language_modeling_rnn.yml
+++ b/configs/wikitext/wikitext_language_modeling_rnn.yml
@@ -5,7 +5,7 @@ training:
     data_folder: &data_folder ~/data/language_modeling/wikitext-2
     dataset: &dataset wikitext-2
     subset: train
-    sentence_length: 50
+    sentence_length: 10
     batch_size:  64
 
   # optimizer parameters:
@@ -27,7 +27,7 @@ validation:
     data_folder: *data_folder
     dataset: *dataset
     subset: valid
-    sentence_length: 50
+    sentence_length: 20
     batch_size:  64
 
 # Testing parameters:
diff --git a/ptp/components/mixins/word_mappings.py b/ptp/components/mixins/word_mappings.py
index 53bcf0c..1920574 100644
--- a/ptp/components/mixins/word_mappings.py
+++ b/ptp/components/mixins/word_mappings.py
@@ -17,31 +17,26 @@
 import os
 
 import ptp.components.utils.word_mappings as wm
-from ptp.components.component import Component
 
 
-class WordMappings(Component):
+class WordMappings(object):
     """
     Mixin class that handles the initialization of (word:index) mappings.
+    Assumes that it is mixed-in into class that is derived from the component.
+    .. warning::
+        Constructor (__init__) of the Component class has to be called before component of the mixin WordMapping class.
+
     """
-    def __init__(self, name, class_type, config):
+    def __init__(self): #, name, class_type, config):
         """
         Initializes the (word:index) mappings.
 
-        Loads parameters from configuration, 
-
-        :param name: Component name (read from configuration file).
-        :type name: str
-
-        :param class_type: Class type of the component (derrived from this class).
-
-        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
-        :type config: :py:class:`ptp.configuration.ConfigInterface`
+        Assumes that Component was initialized in advance, which means that the self object possesses the following objects:
+            - self.config
+            - self.globals
+            - self.logger
 
         """
-        # Call constructors of parent classes.
-        Component.__init__(self, name, class_type, config)
-
         # Read the actual configuration.
         self.data_folder = os.path.expanduser(self.config['data_folder'])
 
diff --git a/ptp/components/models/sentence_embeddings.py b/ptp/components/models/sentence_embeddings.py
index 5d44bd4..6004e2a 100644
--- a/ptp/components/models/sentence_embeddings.py
+++ b/ptp/components/models/sentence_embeddings.py
@@ -50,7 +50,7 @@ def __init__(self, name, config):
         """
         # Call base class constructors.
         Model.__init__(self, name, SentenceEmbeddings, config)
-        WordMappings.__init__(self, name, SentenceEmbeddings, config)
+        WordMappings.__init__(self)
 
         # Set key mappings.
         self.key_inputs = self.stream_keys["inputs"]
diff --git a/ptp/components/text/label_indexer.py b/ptp/components/text/label_indexer.py
index 410aa46..c3090cd 100644
--- a/ptp/components/text/label_indexer.py
+++ b/ptp/components/text/label_indexer.py
@@ -16,11 +16,12 @@
 
 import torch
 
+from ptp.components.component import Component
 from ptp.components.mixins.word_mappings import WordMappings
 from ptp.data_types.data_definition import DataDefinition
 
 
-class LabelIndexer(WordMappings):
+class LabelIndexer(Component, WordMappings):
     """
     Class responsible for changing of samples consisting of single words/labels into indices (that e.g. can be latter used for loss calculation, PyTorch-style).
     """
@@ -35,8 +36,9 @@ def __init__(self, name, config):
         :type config: :py:class:`ptp.configuration.ConfigInterface`
 
         """
-        # Call constructor(s) of parent class(es).
-        WordMappings.__init__(self, name, LabelIndexer, config)
+        # Call constructor(s) of parent class(es) - in the right order!
+        Component.__init__(self, name, LabelIndexer, config)
+        WordMappings.__init__(self)
 
         # Set key mappings.
         self.key_inputs = self.stream_keys["inputs"]
diff --git a/ptp/components/text/sentence_indexer.py b/ptp/components/text/sentence_indexer.py
index abaf94a..b21e0f4 100644
--- a/ptp/components/text/sentence_indexer.py
+++ b/ptp/components/text/sentence_indexer.py
@@ -16,11 +16,12 @@
 
 import torch
 
+from ptp.components.component import Component
 from ptp.components.mixins.word_mappings import WordMappings
 from ptp.data_types.data_definition import DataDefinition
 
 
-class SentenceIndexer(WordMappings):
+class SentenceIndexer(Component, WordMappings):
     """
     Class responsible for encoding of sequences of words into list of indices.
     Those can be letter embedded, encoded with 1-hot encoding or else.
@@ -36,13 +37,18 @@ def __init__(self, name, config):
         :type config: :py:class:`ptp.configuration.ConfigInterface`
 
         """
-        # Call constructor(s) of parent class(es).
-        WordMappings.__init__(self, name, SentenceIndexer, config)
+        # Call constructor(s) of parent class(es) - in the right order!
+        Component.__init__(self, name, SentenceIndexer, config)
+        WordMappings.__init__(self)
 
         # Set key mappings.
         self.key_inputs = self.stream_keys["inputs"]
         self.key_outputs = self.stream_keys["outputs"]
-        
+
+        # Read mode from the configuration.
+        self.mode_reverse = self.config['reverse']
+
+
 
     def input_data_definitions(self):
         """ 
diff --git a/ptp/components/text/sentence_one_hot_encoder.py b/ptp/components/text/sentence_one_hot_encoder.py
index c25100f..b25a3e8 100644
--- a/ptp/components/text/sentence_one_hot_encoder.py
+++ b/ptp/components/text/sentence_one_hot_encoder.py
@@ -16,11 +16,12 @@
 
 import torch
 
+from ptp.components.component import Component
 from ptp.components.mixins.word_mappings import WordMappings
 from ptp.data_types.data_definition import DataDefinition
 
 
-class SentenceOneHotEncoder(WordMappings):
+class SentenceOneHotEncoder(Component, WordMappings):
     """
     Class responsible for encoding of samples being sequences of words using 1-hot encoding.
     """
@@ -35,8 +36,9 @@ def __init__(self, name, config):
         :type config: :py:class:`ptp.configuration.ConfigInterface`
 
         """
-        # Call constructor(s) of parent class(es).
-        WordMappings.__init__(self, name, SentenceOneHotEncoder, config)
+        # Call constructor(s) of parent class(es) - in the right order!
+        Component.__init__(self, name, SentenceOneHotEncoder, config)
+        WordMappings.__init__(self)
 
         # Set key mappings.
         self.key_inputs = self.stream_keys["inputs"]
diff --git a/ptp/components/text/word_decoder.py b/ptp/components/text/word_decoder.py
index 0e5a052..e75dd15 100644
--- a/ptp/components/text/word_decoder.py
+++ b/ptp/components/text/word_decoder.py
@@ -16,11 +16,12 @@
 
 import torch
 
+from ptp.components.component import Component
 from ptp.components.mixins.word_mappings import WordMappings
 from ptp.data_types.data_definition import DataDefinition
 
 
-class WordDecoder(WordMappings):
+class WordDecoder(Component, WordMappings):
     """
     Class responsible for decoding of samples encoded in the form of vectors ("probability distributions").
     """
@@ -35,8 +36,9 @@ def __init__(self, name, config):
         :type config: :py:class:`ptp.configuration.ConfigInterface`
 
         """
-        # Call constructor(s) of parent class(es).
-        WordMappings.__init__(self, name, WordDecoder, config)
+        # Call constructor(s) of parent class(es) - in the right order!
+        Component.__init__(self, name, WordDecoder, config)
+        WordMappings.__init__(self)
 
         # Construct reverse mapping for faster processing.
         self.ix_to_word = dict((v,k) for k,v in self.word_to_ix.items())

From 8585face33b486703e412902dd9d535d135ac668 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Fri, 19 Apr 2019 15:14:13 -0700
Subject: [PATCH 34/39] Deindexing mode added to sentence indexer

---
 .../components/text/sentence_indexer.yml      |   5 +
 .../wikitext_language_modeling_rnn.yml        |  21 ++-
 ptp/components/text/sentence_indexer.py       | 131 ++++++++++++++++--
 3 files changed, 142 insertions(+), 15 deletions(-)

diff --git a/configs/default/components/text/sentence_indexer.yml b/configs/default/components/text/sentence_indexer.yml
index 25c2f5e..0921bc7 100644
--- a/configs/default/components/text/sentence_indexer.yml
+++ b/configs/default/components/text/sentence_indexer.yml
@@ -28,6 +28,11 @@ export_word_mappings_to_globals: False
 # Operation mode. If 'reverse' is True, then it will change indices into words (LOADED)
 reverse: False
 
+# Flag indicating whether inputs are represented as distributions or indices (LOADED)
+# Options: True (expects distribution for each input item in sequence)
+#          False (expects indices (max args))
+use_input_distributions: False
+
 streams: 
   ####################################################################
   # 2. Keymappings associated with INPUT and OUTPUT streams.
diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml
index 3e87643..811dbb5 100644
--- a/configs/wikitext/wikitext_language_modeling_rnn.yml
+++ b/configs/wikitext/wikitext_language_modeling_rnn.yml
@@ -46,7 +46,7 @@ pipeline:
   # Source encoding - model 1.
   source_sentence_embedding:
     type: SentenceEmbeddings
-    priority: 1.1
+    priority: 1
     embeddings_size: 50
     pretrained_embeddings: glove.6B.50d.txt
     data_folder: *data_folder
@@ -61,7 +61,7 @@ pipeline:
   # Target encoding.
   target_indexer:
     type: SentenceIndexer
-    priority: 2.1
+    priority: 2
     data_folder: *data_folder
     import_word_mappings_from_globals: True
     streams:
@@ -87,6 +87,21 @@ pipeline:
     streams:
       targets: indexed_targets
 
+  # Prediction decoding.
+  prediction_decoder:
+    type: SentenceIndexer
+    priority: 10
+    # Reverse mode.
+    reverse: True
+    # Use distributions as inputs.
+    use_input_distributions: True
+    data_folder: *data_folder
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predictions
+      outputs: prediction_sentences
+
+
   # Statistics.
   batch_size:
     type: BatchSizeStatistics
@@ -109,6 +124,6 @@ pipeline:
   viewer:
     type: StreamViewer
     priority: 100.3
-    input_streams: sources,targets,indexed_targets,predictions
+    input_streams: sources,targets,indexed_targets,prediction_sentences
 
 #: pipeline
diff --git a/ptp/components/text/sentence_indexer.py b/ptp/components/text/sentence_indexer.py
index b21e0f4..7cb0ece 100644
--- a/ptp/components/text/sentence_indexer.py
+++ b/ptp/components/text/sentence_indexer.py
@@ -25,6 +25,8 @@ class SentenceIndexer(Component, WordMappings):
     """
     Class responsible for encoding of sequences of words into list of indices.
     Those can be letter embedded, encoded with 1-hot encoding or else.
+
+    Additianally, when 'reverse' mode is on, it works in the oposite direction, i.e. changing tensor witl indices into list of words.
     """
     def __init__(self, name, config):
         """
@@ -48,6 +50,13 @@ def __init__(self, name, config):
         # Read mode from the configuration.
         self.mode_reverse = self.config['reverse']
 
+        if self.mode_reverse:
+            # We will need reverse (index:word) mapping.
+            self.ix_to_word = dict((v,k) for k,v in self.word_to_ix.items())
+
+        # Get inputs distributions/indices flag.
+        self.use_input_distributions = self.config["use_input_distributions"]
+
 
 
     def input_data_definitions(self):
@@ -56,9 +65,19 @@ def input_data_definitions(self):
 
         :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
         """
-        return {
-            self.key_inputs: DataDefinition([-1, -1, 1], [list, list, str], "Batch of sentences, each represented as a list of words [BATCH_SIZE] x [SEQ_LENGTH] x [string]"),
-            }
+        if self.mode_reverse:
+            if self.use_input_distributions:
+                return {
+                    self.key_inputs: DataDefinition([-1, -1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor with batch of probability distributions [BATCH_SIZE x SEQ_LENGTH x ITEM_SIZE]"),
+                    }
+            else: 
+                return {
+                    self.key_inputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words [BATCH_SIZE x SEQ_LENGTH]"),
+                    }
+        else: 
+            return {
+                self.key_inputs: DataDefinition([-1, -1, 1], [list, list, str], "Batch of sentences, each represented as a list of words [BATCH_SIZE] x [SEQ_LENGTH] x [string]"),
+                }
 
     def output_data_definitions(self):
         """ 
@@ -66,25 +85,50 @@ def output_data_definitions(self):
 
         :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
         """
-        return {
-            self.key_outputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words  [BATCH_SIZE x SEQ_LENGTH]"),
-            }
+        if self.mode_reverse:
+            return {
+                self.key_outputs: DataDefinition([-1, -1, 1], [list, list, str], "Batch of sentences, each represented as a list of words [BATCH_SIZE] x [SEQ_LENGTH] x [string]"),
+                }
+        else: 
+            return {
+                self.key_outputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words  [BATCH_SIZE x SEQ_LENGTH]"),
+                }
+
 
     def __call__(self, data_dict):
         """
-        Encodes "inputs" in the format of list of tokens (for a single sample)
-        Stores result in "encoded_inputs" field of in data_dict.
+        Encodes inputs into outputs.
+        Depending on the mode (set by 'reverse' config param) calls sentences_to_tensor() (when False) or tensor_to_sentences() (when set to True).
 
-        :param data_dict: :py:class:`ptp.utils.DataDict` object containing (among others):
+        :param data_dict: :py:class:`ptp.datatypes.DataDict` object.
+        """
+        if self.mode_reverse:
+            if self.use_input_distributions:
+                # Produce list of words.
+                self.tensor_distributions_to_sentences(data_dict)
+            else:
+                # Produce list of words.
+                self.tensor_indices_to_sentences(data_dict)
+        else:
+            # Produce indices.
+            self.sentences_to_tensor(data_dict)
+
+
+    def sentences_to_tensor(self, data_dict):
+        """
+        Encodes "inputs" in the format of batch of list of words into a single tensor with corresponding indices.
 
-            - "inputs": expected input field containing list of words [BATCH_SIZE] x [SEQ_SIZE] x [string]
+        :param data_dict: :py:class:`ptp.datatypes.DataDict` object containing (among others):
 
-            - "encoded_targets": added output field containing list of indices [BATCH_SIZE x SEQ_SIZE] 
+            - "inputs": expected input field containing list of lists of words [BATCH_SIZE] x [SEQ_SIZE] x [string]
+
+            - "outputs": added output field containing tensor with indices [BATCH_SIZE x SEQ_SIZE] 
         """
         # Get inputs to be encoded.
         inputs = data_dict[self.key_inputs]
+
         outputs_list = []
-        # Process samples 1 by one.
+        # Process sentences 1 by 1.
         for sample in inputs:
             assert isinstance(sample, (list,)), 'This encoder requires input sample to contain a list of words'
             # Process list.
@@ -102,3 +146,66 @@ def __call__(self, data_dict):
         output = self.app_state.LongTensor(outputs_list)
         # Create the returned dict.
         data_dict.extend({self.key_outputs: output})
+
+    def tensor_indices_to_sentences(self, data_dict):
+        """
+        Encodes "inputs" in the format of tensor with indices into a batch of list of words.
+
+        :param data_dict: :py:class:`ptp.datatypes.DataDict` object containing (among others):
+
+            - "inputs": added output field containing tensor with indices [BATCH_SIZE x SEQ_SIZE] 
+
+            - "outputs": expected input field containing list of lists of words [BATCH_SIZE] x [SEQ_SIZE] x [string]
+
+        """
+        # Get inputs to be changed to words.
+        inputs = data_dict[self.key_inputs].data.cpu().numpy().tolist()
+
+        outputs_list = []
+        # Process samples 1 by 1.
+        for sample in inputs:
+            # Process list.
+            output_sample = []
+            # "Decode" sample (list of indices).
+            for token in sample:
+                # Get word.
+                output_word = self.ix_to_word[token]
+                # Add index to outputs.
+                output_sample.append( output_word )
+            # Add sentence to batch.
+            outputs_list.append(output_sample)
+
+        # Create the returned dict.
+        data_dict.extend({self.key_outputs: outputs_list})
+
+    def tensor_distributions_to_sentences(self, data_dict):
+        """
+        Encodes "inputs" in the format of tensor with probability distributions into a batch of list of words.
+
+        :param data_dict: :py:class:`ptp.datatypes.DataDict` object containing (among others):
+
+            - "inputs": added output field containing tensor with indices [BATCH_SIZE x SEQ_SIZE x ITEM_SIZE] 
+
+            - "outputs": expected input field containing list of lists of words [BATCH_SIZE] x [SEQ_SIZE] x [string]
+
+        """
+        # Get inputs to be changed to words.
+        inputs = data_dict[self.key_inputs].max(2)[1].data.cpu().numpy().tolist()
+
+        outputs_list = []
+        # Process samples 1 by 1.
+        for sample in inputs:
+            # Process list.
+            output_sample = []
+            # "Decode" sample (list of indices).
+            for token in sample:
+
+                # Get word.
+                output_word = self.ix_to_word[token]
+                # Add index to outputs.
+                output_sample.append( output_word )
+            # Add sentence to batch.
+            outputs_list.append(output_sample)
+
+        # Create the returned dict.
+        data_dict.extend({self.key_outputs: outputs_list})

From 3142b7cddf4ad9042898088617a8d2a375a5b954 Mon Sep 17 00:00:00 2001
From: Alexis Asseman <33075224+aasseman@users.noreply.github.com>
Date: Mon, 22 Apr 2019 17:36:21 -0700
Subject: [PATCH 35/39] Added broadcast functionality to FFN

---
 .../models/feed_forward_network.yml           |  7 ++++++
 ptp/components/models/feed_forward_network.py | 22 +++++++++++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/configs/default/components/models/feed_forward_network.yml b/configs/default/components/models/feed_forward_network.yml
index 55a43b1..b9b80c0 100644
--- a/configs/default/components/models/feed_forward_network.yml
+++ b/configs/default/components/models/feed_forward_network.yml
@@ -15,6 +15,13 @@ dropout_rate: 0
 # If true, output of the last layer will be additionally processed with Log Softmax (LOADED)
 use_logsoftmax: True
 
+# Number of dimensions, where:
+#   - 2 means [Batch size, Input size]
+#   - n means [Batch size, dim 1, ..., dim n-2, Input size]
+# And the FFN is broadcasted over the last (Input Size) Dimension.
+# Also, all the dimensions sizes but the last are conserved, as the FFN is applied over the last dimension.
+dimensions: 2
+
 streams: 
   ####################################################################
   # 2. Keymappings associated with INPUT and OUTPUT streams.
diff --git a/ptp/components/models/feed_forward_network.py b/ptp/components/models/feed_forward_network.py
index adbc757..5d4dbd0 100644
--- a/ptp/components/models/feed_forward_network.py
+++ b/ptp/components/models/feed_forward_network.py
@@ -40,6 +40,8 @@ def __init__(self, name, config):
         self.key_inputs = self.stream_keys["inputs"]
         self.key_predictions = self.stream_keys["predictions"]
 
+        self.dimensions = self.config["dimensions"]
+
         # Retrieve input size from global variables.
         self.input_size = self.globals["input_size"]
         if type(self.input_size) == list:
@@ -106,7 +108,7 @@ def input_data_definitions(self):
         :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
         """
         return {
-            self.key_inputs: DataDefinition([-1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x INPUT_SIZE]"),
+            self.key_inputs: DataDefinition(([-1] * (self.dimensions -1)) + [self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x ... x INPUT_SIZE]"),
             }
 
 
@@ -117,7 +119,7 @@ def output_data_definitions(self):
         :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
         """
         return {
-            self.key_predictions: DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x PREDICTION_SIZE]")
+            self.key_predictions: DataDefinition(([-1] * (self.dimensions -1)) + [self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x ... x PREDICTION_SIZE]")
             }
 
     def forward(self, data_dict):
@@ -126,13 +128,22 @@ def forward(self, data_dict):
 
         :param data_dict: DataDict({'inputs', 'predictions ...}), where:
 
-            - inputs: expected inputs [BATCH_SIZE x INPUT_SIZE],
-            - predictions: returned output with predictions (log_probs) [BATCH_SIZE x NUM_CLASSES]
+            - inputs: expected inputs [BATCH_SIZE x ... x INPUT_SIZE],
+            - predictions: returned output with predictions (log_probs) [BATCH_SIZE x ... x NUM_CLASSES]
         """
 
         # Get inputs.
         x = data_dict[self.key_inputs]
 
+        # Check that the input has the number of dimensions that we expect
+        assert len(x.shape) == self.dimensions, \
+            "Expected " + str(self.dimensions) + " dimensions for input, got " + str(len(x.shape))\
+                 + " instead. Check number of dimensions in the config."
+
+        # Reshape such that we do a broadcast over the last dimension
+        origin_shape = x.shape
+        x = x.contiguous().view(-1, origin_shape[-1])
+
         # Propagate inputs through all but last layer.
         for layer in self.layers[:-1]:
             x = layer(x)
@@ -147,5 +158,8 @@ def forward(self, data_dict):
         if self.use_logsoftmax:
             x = self.log_softmax(x)
 
+        # Restore the input dimensions but the last one (as it's been resized by the FFN)
+        x = x.view(*origin_shape[0:self.dimensions-1], -1)
+
         # Add predictions to datadict.
         data_dict.extend({self.key_predictions: x})

From 0e4f39f58a3b75320207ffb6dbefcd70aa0ebe42 Mon Sep 17 00:00:00 2001
From: Alexis Asseman <33075224+aasseman@users.noreply.github.com>
Date: Tue, 23 Apr 2019 09:43:14 -0700
Subject: [PATCH 36/39] Add simple, all-in-one seq2seq RNN component

---
 .../default/components/models/seq2seq_rnn.yml |  81 +++++++
 ...itext_language_modeling_seq2seq_simple.yml | 167 ++++++++++++++
 ptp/components/models/__init__.py             |   2 +
 ptp/components/models/seq2seq_rnn.py          | 214 ++++++++++++++++++
 4 files changed, 464 insertions(+)
 create mode 100644 configs/default/components/models/seq2seq_rnn.yml
 create mode 100644 configs/wikitext/wikitext_language_modeling_seq2seq_simple.yml
 create mode 100644 ptp/components/models/seq2seq_rnn.py

diff --git a/configs/default/components/models/seq2seq_rnn.yml b/configs/default/components/models/seq2seq_rnn.yml
new file mode 100644
index 0000000..9d9350e
--- /dev/null
+++ b/configs/default/components/models/seq2seq_rnn.yml
@@ -0,0 +1,81 @@
+# This file defines the default values for the RNN model.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Size of the hidden state (LOADED)
+hidden_size: 100
+
+# Flag informing the model to learn the intial state (h0/c0) (LOADED)
+# When false, (c0/c0) will be initialized as zeros.
+
+# Initial state type:
+#   * Zero (null vector)
+#   * Trainable (xavier initialization, trainable)
+#   * Input (the initial hidden state comes from an input stream)
+initial_state: Trainable
+
+# Wether to include the last hidden state in the outputs
+output_last_state: False
+
+# Type of recurrent cell (LOADED)
+# Options: LSTM | GRU | RNN_TANH | RNN_RELU
+cell_type: LSTM
+
+# Number of "stacked" layers (LOADED)
+num_layers: 1
+
+# Dropout rate (LOADED)
+# Default: 0 (means that it is turned off)
+dropout_rate: 0
+
+# Prediction mode (LOADED)
+# Options: 
+#   * Dense (passes every activation through output layer) |
+#   * Last (passes only the last activation though output layer) |
+#   * None (all outputs are discarded)
+prediction_mode: Dense
+
+# Input mode
+# Options:
+#   * Dense (every iteration expects an input)
+#   * Autoregression_First (Autoregression, expects an input for the first iteration)
+#   * Autoregression_None (Autoregression, first input will be a null vector)
+input_mode: Dense
+
+autoregression_length: 50
+
+# If true, output of the last layer will be additionally processed with Log Softmax (LOADED)
+use_logsoftmax: True
+
+streams: 
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of images (INPUT)
+  inputs: inputs
+
+  # Stream containing predictions (OUTPUT)
+  predictions: predictions
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  # Size of the input (RETRIEVED)
+  input_size: input_size
+
+  # Size of the prediction (RETRIEVED)
+  prediction_size: prediction_size
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
+
diff --git a/configs/wikitext/wikitext_language_modeling_seq2seq_simple.yml b/configs/wikitext/wikitext_language_modeling_seq2seq_simple.yml
new file mode 100644
index 0000000..731d590
--- /dev/null
+++ b/configs/wikitext/wikitext_language_modeling_seq2seq_simple.yml
@@ -0,0 +1,167 @@
+# This pipeline applies seq2seq on wikitext-2 to make word-level prediction.
+# It's been made for test purposes only, as it is doing:
+# [word 0 , ... , word 49] -> [word 1 , ... , word 50] (basically copying most of the input)
+#
+# The seq2seq here is implemented throught the use of a simplified seq2seq component `Seq2Seq_RNN`
+
+# Training parameters:
+training:
+  problem:
+    type: &p_type WikiTextLanguageModeling
+    data_folder: &data_folder ~/data/language_modeling/wikitext-2
+    dataset: &dataset wikitext-2
+    subset: train
+    sentence_length: 50
+    batch_size:  64
+
+  # optimizer parameters:
+  optimizer:
+    name: Adam
+    lr: 1.0e-3
+
+  # settings parameters
+  terminal_conditions:
+    loss_stop: 1.0e-2
+    episode_limit: 1000000
+    epoch_limit: 100
+
+# Validation parameters:
+validation:
+  partial_validation_interval: 100
+  problem:
+    type: *p_type
+    data_folder: *data_folder
+    dataset: *dataset
+    subset: valid
+    sentence_length: 50
+    batch_size:  64
+
+# Testing parameters:
+testing:
+  problem:
+    type: *p_type 
+    data_folder: *data_folder
+    dataset: *dataset
+    subset: test
+    sentence_length: 50
+    batch_size: 64
+
+pipeline:
+  name: wikitext_language_modeling_rnn
+
+  # Source encoding - model 1.
+  source_sentence_embedding:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings: glove.6B.50d.txt
+    data_folder: *data_folder
+    source_vocabulary_files: wiki.train.tokens,wiki.valid.tokens,wiki.test.tokens
+    vocabulary_mappings_file: wiki.all.tokenized_words
+    additional_tokens: <eos>
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: sources
+      outputs: embedded_sources
+        
+  # Target encoding.
+  target_indexer:
+    type: SentenceIndexer
+    priority: 2.1
+    data_folder: *data_folder
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: targets
+      outputs: indexed_targets
+
+  # Publish the hidden size of the seq2seq
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 1
+    # Add input_size to globals, so classifier will use it.
+    keys: s2s_hidden_size
+    values: 300
+
+  # FF, to resize the embeddings to whatever the hidden size of te seq2seq is.
+  ff_resize_s2s_input:
+    type: FeedForwardNetwork 
+    priority: 2.5
+    s2s_hidden_size: 300
+    use_logsoftmax: False
+    dimensions: 3
+    streams:
+      inputs: embedded_sources
+      predictions: embedded_sources_resized
+    globals:
+      input_size: embeddings_size
+      prediction_size: s2s_hidden_size
+  
+  # LSTM seq2seq
+  lstm_encoder:
+    type: Seq2Seq_RNN
+    priority: 3
+    initial_state: Trainable
+    hidden_size: 300
+    num_layers: 3
+    use_logsoftmax: False
+    streams:
+      inputs: embedded_sources_resized
+      predictions: s2s_output
+    globals:
+      input_size: s2s_hidden_size
+      prediction_size: s2s_hidden_size 
+
+  # FF, to resize the from the hidden size of the seq2seq to the size of the target vector
+  ff_resize_s2s_output:
+    type: FeedForwardNetwork 
+    use_logsoftmax: True
+    dimensions: 3
+    priority: 5
+    streams:
+      inputs: s2s_output
+    globals:
+      input_size: s2s_hidden_size
+      prediction_size: vocabulary_size
+
+  # Loss
+  nllloss:
+    type: NLLLoss
+    priority: 6
+    num_targets_dims: 2
+    streams:
+      targets: indexed_targets
+      loss: loss
+
+  # Prediction decoding.
+  prediction_decoder:
+    type: SentenceIndexer
+    priority: 10
+    # Reverse mode.
+    reverse: True
+    # Use distributions as inputs.
+    use_input_distributions: True
+    data_folder: *data_folder
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predictions
+      outputs: prediction_sentences
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 100.0
+
+  bleu:
+    type: BLEUStatistics
+    priority: 100.2
+    streams:
+      targets: indexed_targets
+
+      
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 100.3
+    input_streams: sources,targets,indexed_targets,prediction_sentences
+
+#: pipeline
diff --git a/ptp/components/models/__init__.py b/ptp/components/models/__init__.py
index 32e95b0..b1d8d64 100644
--- a/ptp/components/models/__init__.py
+++ b/ptp/components/models/__init__.py
@@ -6,6 +6,7 @@
 from .model import Model
 from .recurrent_neural_network import RecurrentNeuralNetwork
 from .sentence_embeddings import SentenceEmbeddings
+from .seq2seq_rnn import Seq2Seq_RNN
 
 __all__ = [
     'ConvNetEncoder',
@@ -16,4 +17,5 @@
     'Model',
     'RecurrentNeuralNetwork',
     'SentenceEmbeddings',
+    'Seq2Seq_RNN'
     ]
diff --git a/ptp/components/models/seq2seq_rnn.py b/ptp/components/models/seq2seq_rnn.py
new file mode 100644
index 0000000..c8d1bdb
--- /dev/null
+++ b/ptp/components/models/seq2seq_rnn.py
@@ -0,0 +1,214 @@
+# Copyright (C) tkornuta, IBM Corporation 2019
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__author__ = "Alexis Asseman"
+
+import torch
+
+from ptp.configuration.configuration_error import ConfigurationError
+from ptp.components.models.model import Model
+from ptp.data_types.data_definition import DataDefinition
+
+
+class Seq2Seq_RNN(Model): 
+    """
+    Simple Classifier consisting of fully connected layer with log softmax non-linearity.
+    """
+    def __init__(self, name, config):
+        """
+        Initializes the model.
+
+        :param config: Dictionary of parameters (read from configuration ``.yaml`` file).
+        :type config: ``ptp.configuration.ConfigInterface``
+        """
+        # Call constructors of parent classes.
+        Model.__init__(self, name, Seq2Seq_RNN, config)
+
+        # Get input/output mode
+        self.input_mode = self.config["input_mode"]
+
+        self.autoregression_length = self.config["autoregression_length"]
+        
+        # Check if initial state (h0/c0) is zero, trainable, or coming from input stream.
+        self.initial_state = self.config["initial_state"]
+
+        # Get number of layers from config.
+        self.num_layers = self.config["num_layers"]
+
+        # Retrieve input size from global variables.
+        self.key_input_size = self.global_keys["input_size"]
+        self.input_size = self.globals["input_size"]
+        if type(self.input_size) == list:
+            if len(self.input_size) == 1:
+                self.input_size = self.input_size[0]
+            else:
+                raise ConfigurationError("RNN input size '{}' must be a single dimension (current {})".format(self.key_input_size, self.input_size))
+
+        # Retrieve output (prediction) size from global params.
+        self.prediction_size = self.globals["prediction_size"]
+        if type(self.prediction_size) == list:
+            if len(self.prediction_size) == 1:
+                self.prediction_size = self.prediction_size[0]
+            else:
+                raise ConfigurationError("RNN prediction size '{}' must be a single dimension (current {})".format(self.key_prediction_size, self.prediction_size))
+        
+        # Retrieve hidden size from configuration.
+        self.hidden_size = self.config["hidden_size"]
+        if type(self.hidden_size) == list:
+            if len(self.hidden_size) == 1:
+                self.hidden_size = self.hidden_size[0]
+            else:
+                raise ConfigurationError("RNN hidden_size must be a single dimension (current {})".format(self.hidden_size))
+
+        # Create RNN depending on the configuration
+        self.cell_type = self.config["cell_type"]
+        if self.cell_type in ['LSTM', 'GRU']:
+            # Create rnn cell.
+            self.rnn_cell_enc = getattr(torch.nn, self.cell_type)(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
+            self.rnn_cell_dec = getattr(torch.nn, self.cell_type)(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
+        else:
+            try:
+                # Retrieve the non-linearity.
+                nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[self.cell_type]
+                # Create rnn cell.
+                self.rnn_cell_enc = torch.nn.RNN(self.input_size, self.hidden_size, self.num_layers, nonlinearity=nonlinearity, batch_first=True)
+                self.rnn_cell_dec = torch.nn.RNN(self.input_size, self.hidden_size, self.num_layers, nonlinearity=nonlinearity, batch_first=True)
+            except KeyError:
+                raise ConfigurationError( "Invalid RNN type, available options for 'cell_type' are ['LSTM', 'GRU', 'RNN_TANH', 'RNN_RELU'] (currently '{}')".format(self.cell_type))
+        
+        # Parameters - for a single sample.
+        h0 = torch.zeros(self.num_layers, 1, self.hidden_size)
+        c0 = torch.zeros(self.num_layers, 1, self.hidden_size)
+
+        self.init_hidden = None
+
+        if self.initial_state == "Trainable":
+            self.logger.info("Using trainable initial (h0/c0) state")
+            # Initialize a single vector used as hidden state.
+            # Initialize it using xavier initialization.
+            torch.nn.init.xavier_uniform(h0)
+            # It will be trainable, i.e. the system will learn what should be the right initialization state.
+            self.init_hidden = torch.nn.Parameter(h0, requires_grad=True)
+            # Initilize memory cell in a similar way.
+            if self.cell_type == 'LSTM':
+                torch.nn.init.xavier_uniform(c0)
+                self.init_memory = torch.nn.Parameter(c0, requires_grad=True)
+        elif self.initial_state == "Zero":
+            self.logger.info("Using zero initial (h0/c0) state")
+            # We will still embedd it into parameter to enable storing/loading of both types of models by each other.
+            self.init_hidden = torch.nn.Parameter(h0, requires_grad=False)
+            if self.cell_type == 'LSTM':
+                self.init_memory = torch.nn.Parameter(c0, requires_grad=False)
+
+        # Get key mappings.
+        self.key_inputs = self.stream_keys["inputs"]
+        self.key_predictions = self.stream_keys["predictions"]
+        
+        self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size))
+
+        # Create the output layer.
+        self.activation2output = torch.nn.Linear(self.hidden_size, self.prediction_size)
+        
+        # Create the final non-linearity.
+        self.use_logsoftmax = self.config["use_logsoftmax"]
+        if self.use_logsoftmax:
+            # Used then returning dense prediction, i.e. every output of unfolded model.
+            self.log_softmax = torch.nn.LogSoftmax(dim=2)
+
+    def initialize_hiddens_state(self, batch_size):
+
+        if self.cell_type == 'LSTM':
+            # Return tuple (hidden_state, memory_cell).
+            return (self.init_hidden.expand(self.num_layers, batch_size, self.hidden_size).contiguous(),
+                self.init_memory.expand(self.num_layers, batch_size, self.hidden_size).contiguous() )
+
+        else:
+            # Return hidden_state.
+            return self.init_hidden.expand(self.num_layers, batch_size, self.hidden_size).contiguous()
+
+
+    def input_data_definitions(self):
+        """ 
+        Function returns a dictionary with definitions of input data that are required by the component.
+
+        :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        d = {}
+
+        d[self.key_inputs] = DataDefinition([-1, -1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]")
+
+        return d
+
+    def output_data_definitions(self):
+        """ 
+        Function returns a dictionary with definitions of output data produced the component.
+
+        :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        d = {}
+    
+        d[self.key_predictions] = DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
+
+        return d
+
+    def forward(self, data_dict):
+        """
+        Forward pass of the model.
+
+        :param data_dict: DataDict({'inputs', 'predictions ...}), where:
+
+            - inputs: expected inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE],
+            - predictions: returned output with predictions (log_probs) [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]
+        """
+        
+        # Get inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]
+        inputs = data_dict[self.key_inputs]
+        if inputs.dim() == 2:
+            inputs = inputs.unsqueeze(1)
+        batch_size = inputs.shape[0]
+
+
+        # Initialize hidden state.
+        hidden = self.initialize_hiddens_state(batch_size)
+
+
+        # Encoder
+        activations, hidden = self.rnn_cell_enc(inputs, hidden)
+
+        # Propagate inputs through rnn cell.
+        activations_partial, hidden = self.rnn_cell_dec(activations[:, -1, :].unsqueeze(1), hidden)
+        activations = []
+        activations += [activations_partial]
+        for i in range(self.autoregression_length - 1):
+            activations_partial, hidden = self.rnn_cell_dec(activations_partial, hidden)
+            activations += [activations_partial]
+        activations = torch.stack(activations, 1)
+
+        # Pass every activation through the output layer.
+        # Reshape to 2D tensor [BATCH_SIZE * SEQ_LEN x HIDDEN_SIZE]
+        outputs = activations.contiguous().view(-1, self.hidden_size)
+
+        # Propagate data through the output layer [BATCH_SIZE * SEQ_LEN x PREDICTION_SIZE]
+        outputs = self.activation2output(outputs)
+
+        # Reshape back to 3D tensor [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]
+        outputs = outputs.view(activations.size(0), activations.size(1), outputs.size(1))
+
+        # Log softmax - along PREDICTION dim.
+        if self.use_logsoftmax:
+            outputs = self.log_softmax(outputs)
+
+        # Add predictions to datadict.
+        data_dict.extend({self.key_predictions: outputs})
+

From 2794e2b8dd27d6dcb9dc40caca5c376b1e5ee640 Mon Sep 17 00:00:00 2001
From: Alexis Asseman <33075224+aasseman@users.noreply.github.com>
Date: Tue, 23 Apr 2019 09:46:01 -0700
Subject: [PATCH 37/39] Modified RecurrentNeuralNetwork, such that it can
 input/output hidden state stream, and do autoregression.

---
 .../models/recurrent_neural_network.yml       |   8 +
 .../wikitext_language_modeling_seq2seq.yml    | 196 ++++++++++++++++++
 .../models/recurrent_neural_network.py        |  42 +++-
 3 files changed, 235 insertions(+), 11 deletions(-)
 create mode 100644 configs/wikitext/wikitext_language_modeling_seq2seq.yml

diff --git a/configs/default/components/models/recurrent_neural_network.yml b/configs/default/components/models/recurrent_neural_network.yml
index f43a5bf..a0e6f5e 100644
--- a/configs/default/components/models/recurrent_neural_network.yml
+++ b/configs/default/components/models/recurrent_neural_network.yml
@@ -57,9 +57,17 @@ streams:
   # Stream containing batch of images (INPUT)
   inputs: inputs
 
+  # Stream containing the inital state of the RNN (INPUT)
+  # The stream will be actually created only if `inital_state: Input`
+  input_state: input_state
+
   # Stream containing predictions (OUTPUT)
   predictions: predictions
 
+  # Stream containing the final output state of the RNN (output)
+  # The stream will be actually created only if `output_last_state: True`
+  output_state: output_state
+
 globals:
   ####################################################################
   # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
diff --git a/configs/wikitext/wikitext_language_modeling_seq2seq.yml b/configs/wikitext/wikitext_language_modeling_seq2seq.yml
new file mode 100644
index 0000000..84bbeaf
--- /dev/null
+++ b/configs/wikitext/wikitext_language_modeling_seq2seq.yml
@@ -0,0 +1,196 @@
+# This pipeline applies seq2seq on wikitext-2 to make word-level prediction.
+# It's been made for test purposes only, as it is doing:
+# [word 0 , ... , word 49] -> [word 1 , ... , word 50] (basically copying most of the input)
+#
+# The seq2seq here is implemented throught the use of 2 `RecurrentNeuralNetwork`
+
+# Training parameters:
+training:
+  problem:
+    type: &p_type WikiTextLanguageModeling
+    data_folder: &data_folder ~/data/language_modeling/wikitext-2
+    dataset: &dataset wikitext-2
+    subset: train
+    sentence_length: 50
+    batch_size:  64
+
+  # optimizer parameters:
+  optimizer:
+    name: Adam
+    lr: 1.0e-3
+
+  # settings parameters
+  terminal_conditions:
+    loss_stop: 1.0e-2
+    episode_limit: 1000000
+    epoch_limit: 100
+
+# Validation parameters:
+validation:
+  partial_validation_interval: 100
+  problem:
+    type: *p_type
+    data_folder: *data_folder
+    dataset: *dataset
+    subset: valid
+    sentence_length: 50
+    batch_size:  64
+
+# Testing parameters:
+testing:
+  problem:
+    type: *p_type 
+    data_folder: *data_folder
+    dataset: *dataset
+    subset: test
+    sentence_length: 50
+    batch_size: 64
+
+pipeline:
+  name: wikitext_language_modeling_seq2seq
+
+  # Source encoding - model 1.
+  source_sentence_embedding:
+    type: SentenceEmbeddings
+    priority: 1.1
+    embeddings_size: 50
+    pretrained_embeddings: glove.6B.50d.txt
+    data_folder: *data_folder
+    source_vocabulary_files: wiki.train.tokens,wiki.valid.tokens,wiki.test.tokens
+    vocabulary_mappings_file: wiki.all.tokenized_words
+    additional_tokens: <eos>
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: sources
+      outputs: embedded_sources
+        
+  # Target encoding.
+  target_indexer:
+    type: SentenceIndexer
+    priority: 2.1
+    data_folder: *data_folder
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: targets
+      outputs: indexed_targets
+
+  # Publish the hidden size of the seq2seq
+  global_publisher:
+    type: GlobalVariablePublisher
+    priority: 1
+    # Add input_size to globals, so classifier will use it.
+    keys: s2s_hidden_size
+    values: 300
+
+  # FF, to resize the embeddings to whatever the hidden size of te seq2seq is.
+  ff_resize_s2s_input:
+    type: FeedForwardNetwork 
+    priority: 2.5
+    s2s_hidden_size: 300
+    use_logsoftmax: False
+    dimensions: 3
+    streams:
+      inputs: embedded_sources
+      predictions: embedded_sources_resized
+    globals:
+      input_size: embeddings_size
+      prediction_size: s2s_hidden_size
+  
+  # LSTM Encoder
+  lstm_encoder:
+    type: RecurrentNeuralNetwork
+    priority: 3
+    initial_state: Trainable
+    hidden_size: 300
+    num_layers: 3
+    use_logsoftmax: False
+    output_last_state: True
+    prediction_mode: Last
+    streams:
+      inputs: embedded_sources_resized
+      predictions: s2s_encoder_output
+      output_state: s2s_state_output
+    globals:
+      input_size: s2s_hidden_size
+      prediction_size: s2s_hidden_size 
+
+  # LSTM Decoder
+  lstm_decoder:
+    type: RecurrentNeuralNetwork
+    priority: 4
+    initial_state: Input
+    hidden_size: 300
+    num_layers: 3
+    use_logsoftmax: False
+    input_mode: Autoregression_First
+    autoregression_length: 50
+    prediction_mode: Dense
+    streams:
+      inputs: s2s_encoder_output
+      predictions: s2s_decoder_output
+      input_state: s2s_state_output
+    globals:
+      input_size: s2s_hidden_size
+      prediction_size: s2s_hidden_size 
+
+  # FF, to resize the from the hidden size of the seq2seq to the size of the target vector
+  ff_resize_s2s_output:
+    type: FeedForwardNetwork 
+    use_logsoftmax: True
+    dimensions: 3
+    priority: 5
+    streams:
+      inputs: s2s_decoder_output
+    globals:
+      input_size: s2s_hidden_size
+      prediction_size: vocabulary_size
+
+  # Loss
+  nllloss:
+    type: NLLLoss
+    priority: 6
+    num_targets_dims: 2
+    streams:
+      targets: indexed_targets
+      loss: loss
+
+  # Prediction decoding.
+  prediction_decoder:
+    type: SentenceIndexer
+    priority: 10
+    # Reverse mode.
+    reverse: True
+    # Use distributions as inputs.
+    use_input_distributions: True
+    data_folder: *data_folder
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predictions
+      outputs: prediction_sentences
+
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 100.0
+
+  #accuracy:
+  #  type: AccuracyStatistics
+  #  priority: 100.1
+  #  streams:
+  #    targets: indexed_targets
+
+  bleu:
+    type: BLEUStatistics
+    priority: 100.2
+    streams:
+      targets: indexed_targets
+
+      
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 100.3
+    input_streams: sources,targets,indexed_targets,prediction_sentences
+
+#: pipeline
diff --git a/ptp/components/models/recurrent_neural_network.py b/ptp/components/models/recurrent_neural_network.py
index d06a48a..75a7bd4 100644
--- a/ptp/components/models/recurrent_neural_network.py
+++ b/ptp/components/models/recurrent_neural_network.py
@@ -101,6 +101,8 @@ def __init__(self, name, config):
         h0 = torch.zeros(self.num_layers, 1, self.hidden_size)
         c0 = torch.zeros(self.num_layers, 1, self.hidden_size)
 
+        self.init_hidden = None
+
         if self.initial_state == "Trainable":
             self.logger.info("Using trainable initial (h0/c0) state")
             # Initialize a single vector used as hidden state.
@@ -112,7 +114,7 @@ def __init__(self, name, config):
             if self.cell_type == 'LSTM':
                 torch.nn.init.xavier_uniform(c0)
                 self.init_memory = torch.nn.Parameter(c0, requires_grad=True)
-        elif self.initial_state == "Zero":
+        elif self.initial_state in ["Zero", "Input"]:
             self.logger.info("Using zero initial (h0/c0) state")
             # We will still embedd it into parameter to enable storing/loading of both types of models by each other.
             self.init_hidden = torch.nn.Parameter(h0, requires_grad=False)
@@ -167,10 +169,11 @@ def input_data_definitions(self):
         if self.input_mode == "Dense":
             d[self.key_inputs] = DataDefinition([-1, -1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]")
         elif self.input_mode == "Autoregression_First":
-            d[self.key_inputs] = DataDefinition([-1, 1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]")
+            d[self.key_inputs] = DataDefinition([-1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]")
 
-        if "Autoregression" in self.input_mode:
-            d[self.key_input_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size])
+        # Input hidden state
+        if self.initial_state == "Input":
+            d[self.key_input_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size], [torch.tensor], "Batch of RNN last states")
 
         return d
 
@@ -188,8 +191,9 @@ def output_data_definitions(self):
             # Only last prediction.
             d[self.key_predictions] = DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
 
+        # Output hidden state stream
         if self.output_last_state:
-            d[self.key_output_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size])
+            d[self.key_output_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size], [torch.tensor], "Batch of RNN last states")
         
         return d
 
@@ -203,31 +207,47 @@ def forward(self, data_dict):
             - predictions: returned output with predictions (log_probs) [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]
         """
         
+        inputs = None
+        batch_size = None
+
         # Get inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]
         if "None" in self.input_mode:
-            batch_size = data_dict[self.key_input_state].shape[0]
-            inputs = torch.zeros(1, self.input_size, self.hidden_size)
+            batch_size = data_dict[self.key_input_state][0].shape[1]
+            inputs = torch.zeros(batch_size, 1, self.hidden_size)
+            if next(self.parameters()).is_cuda:
+                inputs = inputs.cuda() 
+
         else:
-            batch_size = inputs.shape[0]
             inputs = data_dict[self.key_inputs]
-
+            if inputs.dim() == 2:
+                inputs = inputs.unsqueeze(1)
+            batch_size = inputs.shape[0]
 
 
         # Initialize hidden state.
-        hidden = self.initialize_hiddens_state(batch_size)
+        if self.initial_state == "Input":
+            hidden = data_dict[self.key_input_state]
+        else:
+            hidden = self.initialize_hiddens_state(batch_size)
+
+        activations = []
 
-        # Propagate inputs through rnn cell.
+        # Autoregressive mode - feed back outputs in the input
         if "Autoregression" in self.input_mode:
             activations_partial, hidden = self.rnn_cell(inputs, hidden)
             activations += [activations_partial]
+            # Feed back the outputs iteratively
             for i in range(self.autoregression_length - 1):
                 activations_partial, hidden = self.rnn_cell(activations_partial, hidden)
+                # Add the single step output into list
                 if self.prediction_mode == "Dense":
                     activations += [activations_partial]
+            # Reassemble all the outputs from list into an output sequence
             if self.prediction_mode == "Dense":
                 activations = torch.stack(activations, 1)
             else:
                 activations = activations_partial
+        # Normal mode - feed the entire input sequence at once
         else:
             activations, hidden = self.rnn_cell(inputs, hidden)
 

From 1beb8449eb2eb1c1c248b749b43705919850159c Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <43558388+tkornut@users.noreply.github.com>
Date: Tue, 23 Apr 2019 11:15:22 -0700
Subject: [PATCH 38/39] Update seq2seq_rnn.py

author in copyright;)
---
 ptp/components/models/seq2seq_rnn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ptp/components/models/seq2seq_rnn.py b/ptp/components/models/seq2seq_rnn.py
index c8d1bdb..813ab92 100644
--- a/ptp/components/models/seq2seq_rnn.py
+++ b/ptp/components/models/seq2seq_rnn.py
@@ -1,4 +1,4 @@
-# Copyright (C) tkornuta, IBM Corporation 2019
+# Copyright (C) aasseman, IBM Corporation 2019
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From df075d4f664f3bdbd214898fecbfe4af15f5620e Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <43558388+tkornut@users.noreply.github.com>
Date: Tue, 23 Apr 2019 11:33:29 -0700
Subject: [PATCH 39/39] Update __init__.py

Missing comma after 'Seq2Seq_RNN', in __init__
---
 ptp/components/models/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ptp/components/models/__init__.py b/ptp/components/models/__init__.py
index b6bad0f..3451d2f 100644
--- a/ptp/components/models/__init__.py
+++ b/ptp/components/models/__init__.py
@@ -20,7 +20,7 @@
     'Model',
     'RecurrentNeuralNetwork',
     'SentenceEmbeddings',
-    'Seq2Seq_RNN'
+    'Seq2Seq_RNN',
     'ElementWiseMultiplication',
     'MultimodalCompactBilinearPooling',
     ]