From 805429be13ec034deec4e6de500e24b1d72296e2 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 23 Apr 2019 10:44:35 -0700
Subject: [PATCH 01/15] added logging log_dir at the end of training

---
 ptp/workers/online_trainer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ptp/workers/online_trainer.py b/ptp/workers/online_trainer.py
index 641a3c6..ec33760 100644
--- a/ptp/workers/online_trainer.py
+++ b/ptp/workers/online_trainer.py
@@ -334,6 +334,7 @@ def run_experiment(self):
             # Finalize statistics collection.
             self.finalize_statistics_collection()
             self.finalize_tensorboard()
+            self.logger.info("Experiment logged to: {}".format(self.log_dir))
 
 
 def main():

From 02df0226923c64457d6c20f690c3f7559af0aaf5 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 23 Apr 2019 12:48:11 -0700
Subject: [PATCH 02/15] Initial version of RN

---
 .../models/vqa/relational_network.yml         |  55 +++++++
 .../c2_classification_all_rnn_vgg16_mcb.yml   |   1 -
 ...ification_all_rnn_vgg16_relational_net.yml |  94 +++++++++++
 ptp/components/models/__init__.py             |   2 +
 .../models/vqa/relational_network.py          | 147 ++++++++++++++++++
 5 files changed, 298 insertions(+), 1 deletion(-)
 create mode 100644 configs/default/components/models/vqa/relational_network.yml
 create mode 100644 configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
 create mode 100644 ptp/components/models/vqa/relational_network.py

diff --git a/configs/default/components/models/vqa/relational_network.yml b/configs/default/components/models/vqa/relational_network.yml
new file mode 100644
index 0000000..791d62e
--- /dev/null
+++ b/configs/default/components/models/vqa/relational_network.yml
@@ -0,0 +1,55 @@
+# This file defines the default values for the ElementWiseMultiplication model.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Dropout rate (LOADED)
+# Default: 0 (means that it is turned off)
+dropout_rate: 0
+
+# Size of the output of g_theta network/output after concatenation (LOADED)
+output_size: 256
+
+streams: 
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of encoded images (INPUT)
+  feature_maps: feature_maps
+
+  # Stream containing batch of encoded questions (INPUT)
+  question_encodings: question_encodings
+
+  # Stream containing outputs (OUTPUT)
+  outputs: outputs
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  # Height of the features tensor (RETRIEVED)
+  feature_maps_height: feature_maps_height
+
+  # Width of the features tensor (RETRIEVED)
+  feature_maps_width: feature_maps_width
+
+  # Depth of the features tensor (RETRIEVED)
+  feature_maps_depth: feature_maps_depth
+
+  # Size of the question encodings input (RETRIEVED)
+  question_encoding_size: question_encoding_size
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  # Size of the output (SET)
+  output_size: output_size
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
+
diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml
index 0ea068e..75e41ed 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml
@@ -72,7 +72,6 @@ pipeline:
   question_image_fusion:
     priority: 4.1
     type: MultimodalCompactBilinearPooling
-    dropout_rate: 0.5
     streams:
       image_encodings: image_activations
       question_encodings: question_activations
diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
new file mode 100644
index 0000000..232a023
--- /dev/null
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
@@ -0,0 +1,94 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
+
+# Training parameters:
+training:
+  problem:
+    batch_size: 5
+validation:
+  problem:
+    batch_size: 5
+
+pipeline:
+  name: c2_classification_all_rnn_vgg16_relational_net
+
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    # Add input_size to globals.
+    keys: [question_encoder_output_size]
+    values: [100]
+
+  ################# PIPE 0: question #################
+  # Questions encoding.
+  question_tokenizer:
+    priority: 1.1
+    type: SentenceTokenizer
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  question_lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    use_logsoftmax: False
+    initial_state_trainable: False
+    hidden_size: 128
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  ################# PIPE 2: image #################
+  # Image encoder.
+  image_encoder:
+    priority: 3.1
+    type: TorchVisionWrapper
+    return_feature_maps: True
+    streams:
+      inputs: images
+      outputs: feature_maps
+
+  ################# PIPE 3: fusion + classification #################
+  # Element wise multiplication + FF.
+  question_image_fusion:
+    priority: 4.1
+    type: RelationalNetwork
+    dropout_rate: 0.5
+    output_size: [256]
+    streams:
+      question_encodings: question_activations
+      outputs: fused_image_question_activations
+    globals:
+      question_encoding_size: question_encoder_output_size
+      output_size: fused_image_question_activation_size
+
+  classifier:
+    priority: 4.2
+    type: FeedForwardNetwork 
+    hidden_sizes: [512,256]
+    dropout_rate: 0.5
+    streams:
+      inputs: fused_image_question_activations
+    globals:
+      input_size: fused_image_question_activation_size
+      prediction_size: vocabulary_size_c2
+
+  #: pipeline
diff --git a/ptp/components/models/__init__.py b/ptp/components/models/__init__.py
index b8b6093..e362654 100644
--- a/ptp/components/models/__init__.py
+++ b/ptp/components/models/__init__.py
@@ -9,6 +9,7 @@
 
 from .vqa.element_wise_multiplication import ElementWiseMultiplication
 from .vqa.multimodal_compact_bilinear_pooling import MultimodalCompactBilinearPooling
+from .vqa.relational_network import RelationalNetwork
 
 __all__ = [
     'ConvNetEncoder',
@@ -21,4 +22,5 @@
     'SentenceEmbeddings',
     'ElementWiseMultiplication',
     'MultimodalCompactBilinearPooling',
+    'RelationalNetwork',
     ]
diff --git a/ptp/components/models/vqa/relational_network.py b/ptp/components/models/vqa/relational_network.py
new file mode 100644
index 0000000..88452b6
--- /dev/null
+++ b/ptp/components/models/vqa/relational_network.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) IBM Corporation 2018
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__author__ = "Tomasz Kornuta"
+
+
+import torch
+
+from ptp.components.models.model import Model
+from ptp.data_types.data_definition import DataDefinition
+
+
+class RelationalNetwork(Model):
+    """
+    Model implements relational network.
+    Model expects image (CNN) features and encoded question.
+
+    
+    """ 
+    def __init__(self, name, config):
+        """
+        Initializes the model, creates the required layers.
+
+        :param name: Name of the model (taken from the configuration file).
+
+        :param config: Parameters read from configuration file.
+        :type config: ``ptp.configuration.ConfigInterface``
+
+        """
+        super(RelationalNetwork, self).__init__(name, RelationalNetwork, config)
+
+        # Get key mappings.
+        self.key_feature_maps = self.stream_keys["feature_maps"]
+        self.key_question_encodings = self.stream_keys["question_encodings"]
+        self.key_outputs = self.stream_keys["outputs"]
+
+        # Retrieve input sizes from globals.
+        self.feature_maps_height = self.globals["feature_maps_height"]
+        self.feature_maps_width = self.globals["feature_maps_width"]
+        self.feature_maps_depth = self.globals["feature_maps_depth"]
+        self.question_encoding_size = self.globals["question_encoding_size"]
+        
+
+        # Create "object" coordinates.
+        self.obj_coords = []
+        for h in range(self.feature_maps_height):
+            for w in range(self.feature_maps_width):
+                self.obj_coords.append((h,w))
+
+        # Get output_size from config and send it to globals.
+        self.output_size = self.config["output_size"]
+        self.globals["output_size"] = self.output_size
+
+        # Calculate input size to the g_theta: two "objects" + question (+ optionally: image size)
+        input_size = 2 * self.feature_maps_depth + self.question_encoding_size
+
+        # Retrieve dropout rate value - if set, will put dropout between every layer.
+        dropout_rate = self.config["dropout_rate"]
+
+        # Create the model, i.e. the "relational" g_theta MLP.
+        self.g_theta = torch.nn.Sequential(
+            torch.nn.Linear(input_size, self.output_size),
+            # Create activation layer.
+            torch.nn.ReLU(),
+            # Create dropout layer.
+            torch.nn.Dropout(dropout_rate),
+            torch.nn.Linear(self.output_size, self.output_size),
+            torch.nn.ReLU(),
+            torch.nn.Dropout(dropout_rate),
+            torch.nn.Linear(self.output_size, self.output_size),
+            torch.nn.ReLU(),
+            torch.nn.Dropout(dropout_rate),
+            torch.nn.Linear(self.output_size, self.output_size)
+            )
+
+        
+
+    def input_data_definitions(self):
+        """ 
+        Function returns a dictionary with definitions of input data that are required by the component.
+
+        :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        return {
+            self.key_feature_maps: DataDefinition([-1, self.feature_maps_depth, self.feature_maps_height, self.feature_maps_width], [torch.Tensor], "Batch of feature maps [BATCH_SIZE x FEAT_DEPTH x FEAT_HEIGHT x FEAT_WIDTH]"),
+            self.key_question_encodings: DataDefinition([-1, self.question_encoding_size], [torch.Tensor], "Batch of encoded questions [BATCH_SIZE x QUESTION_ENCODING_SIZE]"),
+            }
+
+
+    def output_data_definitions(self):
+        """ 
+        Function returns a dictionary with definitions of output data produced the component.
+
+        :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        return {
+            self.key_outputs: DataDefinition([-1, self.output_size], [torch.Tensor], "Batch of outputs [BATCH_SIZE x OUTPUT_SIZE]")
+            }
+
+    def forward(self, data_dict):
+        """
+        Main forward pass of the model.
+
+        :param data_dict: DataDict({'images',**})
+        :type data_dict: ``ptp.dadatypes.DataDict``
+        """
+
+        # Unpack DataDict.
+        feat_m = data_dict[self.key_feature_maps]
+        enc_q = data_dict[self.key_question_encodings]
+
+        summed_relations = None
+        # Iterate through all pairs of "objects".
+        for (h1,w1) in self.obj_coords:
+            for (h2,w2) in self.obj_coords:
+                # Get feature maps.
+                fm1 = feat_m[:, :, h1,w1].view(-1, self.feature_maps_depth)
+                fm2 = feat_m[:, :, h2,w2].view(-1, self.feature_maps_depth)
+                # Concatenate with question.
+                concat = torch.cat([fm1, fm2, enc_q], dim=1)
+                
+                # Pass it through g_theta.
+                rel = self.g_theta(concat)
+
+                # Add to relations.
+                if summed_relations is None:
+                    summed_relations = rel
+                else:
+                    # Element wise sum.
+                    summed_relations += rel
+
+        # Add outputs to datadict.
+        data_dict.extend({self.key_outputs: summed_relations})

From b07d633c02752b0dd03d5d02aa714ca3e5ab23b6 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 23 Apr 2019 12:50:55 -0700
Subject: [PATCH 03/15] typo fix in config

---
 .../c2_classification_all_rnn_vgg16_relational_net.yml        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
index 232a023..50f1621 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
@@ -72,7 +72,7 @@ pipeline:
     priority: 4.1
     type: RelationalNetwork
     dropout_rate: 0.5
-    output_size: [256]
+    output_size: 256
     streams:
       question_encodings: question_activations
       outputs: fused_image_question_activations
@@ -83,7 +83,7 @@ pipeline:
   classifier:
     priority: 4.2
     type: FeedForwardNetwork 
-    hidden_sizes: [512,256]
+    hidden_sizes: [256,256]
     dropout_rate: 0.5
     streams:
       inputs: fused_image_question_activations

From c1d471fd2f54f3faeca9c630d800a0dc955e9443 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 23 Apr 2019 12:51:56 -0700
Subject: [PATCH 04/15] rn config batch size commented

---
 ...2_classification_all_rnn_vgg16_relational_net.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
index 50f1621..7382308 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
@@ -2,12 +2,12 @@
 default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 
 # Training parameters:
-training:
-  problem:
-    batch_size: 5
-validation:
-  problem:
-    batch_size: 5
+#training:
+#  problem:
+#    batch_size: 5
+#validation:
+#  problem:
+#    batch_size: 5
 
 pipeline:
   name: c2_classification_all_rnn_vgg16_relational_net

From 123187b306b0765ba22438795678d2c7a750e1d4 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 23 Apr 2019 12:53:10 -0700
Subject: [PATCH 05/15] rn config batch size

---
 ...2_classification_all_rnn_vgg16_relational_net.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
index 7382308..7e2f657 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
@@ -2,12 +2,12 @@
 default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 
 # Training parameters:
-#training:
-#  problem:
-#    batch_size: 5
-#validation:
-#  problem:
-#    batch_size: 5
+training:
+  problem:
+    batch_size: 64
+validation:
+  problem:
+    batch_size: 64
 
 pipeline:
   name: c2_classification_all_rnn_vgg16_relational_net

From 2eabb0d8789d09c5fee41d4cbc26f4f3a99f98a9 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 24 Apr 2019 09:47:40 -0700
Subject: [PATCH 06/15] Modified settings for relational network pipe

---
 ...lassification_all_rnn_vgg16_relational_net.yml | 15 +++++++++------
 ptp/components/models/vqa/relational_network.py   |  2 ++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
index 7e2f657..8601775 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
@@ -32,8 +32,8 @@ pipeline:
   question_embeddings:
     priority: 1.2
     type: SentenceEmbeddings
-    embeddings_size: 50
-    pretrained_embeddings_file: glove.6B.50d.txt
+    embeddings_size: 200
+    pretrained_embeddings_file: glove.6B.200d.txt
     data_folder: ~/data/vqa-med
     word_mappings_file: questions.all.word.mappings.csv
     streams:
@@ -47,8 +47,9 @@ pipeline:
     cell_type: LSTM
     prediction_mode: Last
     use_logsoftmax: False
-    initial_state_trainable: False
-    hidden_size: 128
+    initial_state_trainable: True
+    dropout_rate: 0.5
+    hidden_size: 50
     streams:
       inputs: embedded_questions
       predictions: question_activations
@@ -62,6 +63,8 @@ pipeline:
     priority: 3.1
     type: TorchVisionWrapper
     return_feature_maps: True
+    frozen: True
+    freeze: True
     streams:
       inputs: images
       outputs: feature_maps
@@ -72,7 +75,7 @@ pipeline:
     priority: 4.1
     type: RelationalNetwork
     dropout_rate: 0.5
-    output_size: 256
+    output_size: 100
     streams:
       question_encodings: question_activations
       outputs: fused_image_question_activations
@@ -83,7 +86,7 @@ pipeline:
   classifier:
     priority: 4.2
     type: FeedForwardNetwork 
-    hidden_sizes: [256,256]
+    hidden_sizes: [100,100]
     dropout_rate: 0.5
     streams:
       inputs: fused_image_question_activations
diff --git a/ptp/components/models/vqa/relational_network.py b/ptp/components/models/vqa/relational_network.py
index 88452b6..5b5763a 100644
--- a/ptp/components/models/vqa/relational_network.py
+++ b/ptp/components/models/vqa/relational_network.py
@@ -30,6 +30,8 @@ class RelationalNetwork(Model):
     Model expects image (CNN) features and encoded question.
 
     
+    Santoro, A., Raposo, D., Barrett, D. G., Malinowski, M., Pascanu, R., Battaglia, P., & Lillicrap, T. (2017). A simple neural network module for relational reasoning. In Advances in neural information processing systems (pp. 4967-4976).
+    Reference paper: https://arxiv.org/abs/1706.01427.
     """ 
     def __init__(self, name, config):
         """

From 8cbbf99b4a4da602e908ba430595e24d53bc0b8c Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 24 Apr 2019 09:51:40 -0700
Subject: [PATCH 07/15] changed settings (larger glove, dropout) for c2
 ewm_size

---
 ...c2_classification_all_rnn_vgg16_ewm_size.yml | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
index 91ff5d1..1cfb431 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
@@ -1,6 +1,14 @@
 # Load config defining problems for training, validation and testing.
 default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 
+# Training parameters:
+training:
+  problem:
+    batch_size: 64
+validation:
+  problem:
+    batch_size: 64
+
 pipeline:
   name: c2_classification_all_rnn_vgg16_ewm_size
 
@@ -24,8 +32,8 @@ pipeline:
   question_embeddings:
     priority: 1.2
     type: SentenceEmbeddings
-    embeddings_size: 50
-    pretrained_embeddings_file: glove.6B.50d.txt
+    embeddings_size: 200
+    pretrained_embeddings_file: glove.6B.200d.txt
     data_folder: ~/data/vqa-med
     word_mappings_file: questions.all.word.mappings.csv
     streams:
@@ -39,8 +47,9 @@ pipeline:
     cell_type: LSTM
     prediction_mode: Last
     use_logsoftmax: False
-    initial_state_trainable: False
+    initial_state_trainable: True
     hidden_size: 50
+    dropout_rate: 0.5
     streams:
       inputs: embedded_questions
       predictions: question_activations
@@ -117,7 +126,7 @@ pipeline:
   classifier:
     priority: 5.3
     type: FeedForwardNetwork 
-    hidden_sizes: [110]
+    hidden_sizes: [100]
     dropout_rate: 0.5
     streams:
       inputs: concatenated_activations

From 272441988e05689da2bd55c37ad47d0fa1ba9cca Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 24 Apr 2019 21:21:52 -0700
Subject: [PATCH 08/15] reverted some changesin c2: commented dropout in rnn

---
 .../c2_classification_all_rnn_vgg16_ewm_size.yml            | 6 +++---
 .../c2_classification_all_rnn_vgg16_relational_net.yml      | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
index 1cfb431..72c5e1a 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
@@ -32,8 +32,8 @@ pipeline:
   question_embeddings:
     priority: 1.2
     type: SentenceEmbeddings
-    embeddings_size: 200
-    pretrained_embeddings_file: glove.6B.200d.txt
+    embeddings_size: 100
+    pretrained_embeddings_file: glove.6B.100d.txt
     data_folder: ~/data/vqa-med
     word_mappings_file: questions.all.word.mappings.csv
     streams:
@@ -49,7 +49,7 @@ pipeline:
     use_logsoftmax: False
     initial_state_trainable: True
     hidden_size: 50
-    dropout_rate: 0.5
+    #dropout_rate: 0.5
     streams:
       inputs: embedded_questions
       predictions: question_activations
diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
index 8601775..5268ba7 100644
--- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
+++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
@@ -32,8 +32,8 @@ pipeline:
   question_embeddings:
     priority: 1.2
     type: SentenceEmbeddings
-    embeddings_size: 200
-    pretrained_embeddings_file: glove.6B.200d.txt
+    embeddings_size: 100
+    pretrained_embeddings_file: glove.6B.100d.txt
     data_folder: ~/data/vqa-med
     word_mappings_file: questions.all.word.mappings.csv
     streams:
@@ -48,7 +48,7 @@ pipeline:
     prediction_mode: Last
     use_logsoftmax: False
     initial_state_trainable: True
-    dropout_rate: 0.5
+    #dropout_rate: 0.5
     hidden_size: 50
     streams:
       inputs: embedded_questions

From 41cb4729e9ff6e1cee3ab981d90071eb4c617558 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 24 Apr 2019 21:22:20 -0700
Subject: [PATCH 09/15] simple config for c4 ewm

---
 ..._classification_all_rnn_vgg16_ewm_size.yml | 130 ++++++++++++++++++
 .../default_c4_classification.yml             |  98 +++++++++++++
 2 files changed, 228 insertions(+)
 create mode 100644 configs/vqa_med_2019/c4_classification/c4_classification_all_rnn_vgg16_ewm_size.yml
 create mode 100644 configs/vqa_med_2019/c4_classification/default_c4_classification.yml

diff --git a/configs/vqa_med_2019/c4_classification/c4_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c4_classification/c4_classification_all_rnn_vgg16_ewm_size.yml
new file mode 100644
index 0000000..a9b8266
--- /dev/null
+++ b/configs/vqa_med_2019/c4_classification/c4_classification_all_rnn_vgg16_ewm_size.yml
@@ -0,0 +1,130 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml
+
+pipeline:
+  name: c4_classification_all_rnn_vgg16_ewm_size
+
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    # Add input_size to globals.
+    keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size]
+    values: [100, 100, 100, 2, 10]
+
+  ################# PIPE 0: question #################
+  # Questions encoding.
+  question_tokenizer:
+    priority: 1.1
+    type: SentenceTokenizer
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 100
+    pretrained_embeddings_file: glove.6B.100d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  question_lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    use_logsoftmax: False
+    initial_state_trainable: True
+    hidden_size: 50
+    #dropout_rate: 0.5
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  ################# PIPE 2: image #################
+  # Image encoder.
+  image_encoder:
+    priority: 3.1
+    type: TorchVisionWrapper
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ################# PIPE 3: image-question fusion  #################
+  # Element wise multiplication + FF.
+  question_image_fusion:
+    priority: 4.1
+    type: ElementWiseMultiplication
+    dropout_rate: 0.5
+    streams:
+      image_encodings: image_activations
+      question_encodings: question_activations
+      outputs: element_wise_activations
+    globals:
+      image_encoding_size: image_encoder_output_size
+      question_encoding_size: question_encoder_output_size
+      output_size: element_wise_activation_size
+
+  question_image_ffn:
+    priority: 4.2
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: element_wise_activations
+      predictions: question_image_activations
+    globals:
+      input_size: element_wise_activation_size
+      prediction_size: element_wise_activation_size
+
+  ################# PIPE 4: image-question-image size fusion + classification #################
+  # 2nd subpipeline: image size.
+  # Model - image size classifier.
+  image_size_encoder:
+    priority: 5.1
+    type: FeedForwardNetwork 
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  # 4th subpipeline: concatenation + FF.
+  concat:
+    priority: 5.2
+    type: Concatenation
+    input_streams: [question_image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,10]]
+    output_dims: [-1,110]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatentated_activations_size
+
+
+  classifier:
+    priority: 5.3
+    type: FeedForwardNetwork 
+    hidden_sizes: [500]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: concatentated_activations_size
+      prediction_size: vocabulary_size_c4
+
+
+  #: pipeline
diff --git a/configs/vqa_med_2019/c4_classification/default_c4_classification.yml b/configs/vqa_med_2019/c4_classification/default_c4_classification.yml
new file mode 100644
index 0000000..e221187
--- /dev/null
+++ b/configs/vqa_med_2019/c4_classification/default_c4_classification.yml
@@ -0,0 +1,98 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    batch_size: 64
+    categories: C4
+  sampler:
+    name: WeightedRandomSampler
+    weights: ~/data/vqa-med/answers.c4.weights.csv
+  dataloader:
+    num_workers: 4
+  # Termination.
+  terminal_conditions:
+    loss_stop: 1.0e-2
+    episode_limit: 10000
+    epoch_limit: -1
+
+# Validation parameters:
+validation:
+  problem:
+    batch_size: 64
+    categories: C4
+  dataloader:
+    num_workers: 4
+
+
+pipeline:
+
+  # Answer encoding.
+  answer_indexer:
+    type: LabelIndexer
+    priority: 0.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c4.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c4
+      word_mappings: word_mappings_c4
+
+
+  # Predictions decoder.
+  prediction_decoder:
+    type: WordDecoder
+    priority: 10.1
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predictions
+      outputs: predicted_answers
+    globals:
+      vocabulary_size: vocabulary_size_c4
+      word_mappings: word_mappings_c4
+
+  # Loss
+  nllloss:
+    type: NLLLoss
+    priority: 10.2
+    targets_dim: 1
+    streams:
+      targets: answers_ids
+      loss: loss
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 100.1
+
+  #accuracy:
+  #  type: AccuracyStatistics
+  #  priority: 100.2
+  #  streams:
+  #    targets: answers_ids
+
+  precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 100.3
+    use_word_mappings: True
+    show_class_scores: True
+    show_confusion_matrix: True
+    streams:
+      targets: answers_ids
+    globals:
+      word_mappings: word_mappings_c4
+      num_classes: vocabulary_size_c4
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 100.4
+    input_streams: questions,category_names,answers,predicted_answers
+
+#: pipeline

From 5f57c7d22c7ecfb196526d4b6e7c9d5606496008 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Thu, 25 Apr 2019 21:00:14 -0700
Subject: [PATCH 10/15] Added preprocessing function to vqameed problem

---
 .../image_text_to_class/vqa_med_2019.py       | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
index 9273fb5..ed9c433 100644
--- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py
+++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
@@ -179,6 +179,45 @@ def filter_sources(self, source_files, source_categories):
         return source_files, source_categories
 
 
+    def preprocess_text(self, text, remove_stop_words = False):
+        """
+        Function that preprocesses questions/answers as suggested by ImageCLEF VQA challenge organizers:
+            * lowercases all words
+            * removes punctuation
+            * removes stop words (optional)
+
+        :param text: text to be processed.
+        :param remove_stop_words: removes stop words (DEFAULT: False)
+
+        :return: Preprocessed and tokenized text (list of strings)
+        """
+        # Lowercase.
+        text = text.lower()
+
+        # Remove punctuation.
+        translator = str.maketrans('', '', string.punctuation)
+        text = text.translate(translator)
+
+        # Remove '“' and '”' !!!
+        text = text.replace('“','').replace('”','')
+
+        # Tokenize.
+        text_words = nltk.tokenize.word_tokenize(text)
+
+        # If we do not want to remove stop words - return text.
+        if not remove_stop_words:
+            return text_words
+
+        # Perform "cleansing".
+        stops = set(stopwords.words("english"))
+        cleansed_words = [word for word in text_words if word not in stops]
+        # Return the original text if there are no words left :]
+        if len(cleansed_words) == 0:
+            return text_words
+
+        # Return cleaned text.
+        return cleansed_words
+
     def load_dataset(self, source_files, source_categories):
         """
         Loads the dataset from one or more files.

From 2ca681b8f7267fa4ef6913b7ce5ad085e1ac4e67 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Thu, 25 Apr 2019 23:38:59 -0700
Subject: [PATCH 11/15] Added various preprocessing to vqa_med problem

---
 .../image_text_to_class/vqa_med_2019.yml      |  24 +-
 .../image_text_to_class/vqa_med_2019.py       | 255 +++++++++++++-----
 2 files changed, 200 insertions(+), 79 deletions(-)

diff --git a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml
index 53c6407..bd724b4 100644
--- a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml
+++ b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml
@@ -15,10 +15,6 @@ split: training
 # Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4)
 categories: all
 
-# Removes punctuation (LOADED)
-# Options: none | questions | answers | all
-remove_punctuation: questions
-
 # Resize parameter (LOADED)
 # When present, resizes the images from original size to [height, width]
 # Depth remains set to 3.
@@ -28,9 +24,23 @@ remove_punctuation: questions
 # Problem will use those values to rescale the image_sizes to range (0, 1).
 scale_image_size: [2414, 2323]
 
-# Set augmentation parameter
-# Use random affine transformations (rotate, scale and translate)
-use_augmentation: False
+# Select applied image preprocessing/augmentations (LOADED)
+# Use one (or more) of the affine transformations:
+# none | random_affine | random_horizontal_flip | normalize | all
+# Accepted formats: a,b,c or [a,b,c]
+image_preprocessing: normalize
+
+# Select applied question preprocessing/augmentations (LOADED)
+# Use one (or more) of the transformations:
+# none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+# Accepted formats: a,b,c or [a,b,c]
+question_preprocessing: lowercase, remove_punctuation
+
+# Select applied question preprocessing (LOADED)
+# Options: none | lowercase | remove_punctuation | tokenize | all
+# Accepted formats: a,b,c or [a,b,c]
+answer_preprocessing: none
+
 
 streams:
   ####################################################################
diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
index ed9c433..4a9616f 100644
--- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py
+++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
@@ -17,19 +17,51 @@
 
 __author__ = "Chaitanya Shivade, Tomasz Kornuta"
 
+import os
 import string
 import tqdm
 import pandas as pd
 from PIL import Image
-from torchvision import transforms
 
-import os
+import nltk
+from nltk.corpus import stopwords
+
 import torch
 from torchvision import transforms
 
 from ptp.components.problems.problem import Problem
 from ptp.data_types.data_definition import DataDefinition
 
+from ptp.configuration.configuration_error import ConfigurationError
+
+def get_value_list_from_dictionary(key, parameter_dict, accepted_values = []):
+    """
+    Parses parameter values retrieved from a given parameter dictionary using key.
+    Optionally, checks is all values are accepted.
+
+    :param key: Key of the parameter.
+    :param parameter_dict: Dictionary containing given key (e.g. config or globals)
+    :param accepted_values: List of accepted values (DEFAULT: [])
+
+    :return: List of parsed values
+    """
+    parameter = parameter_dict[key]
+    # Preprocess parameter value.
+    if (type(parameter) == str):
+        values = parameter.replace(" ","").split(",")
+    else:
+        values = parameter # list
+    assert type(values) == list, "Parameter value must be a list"
+
+    # Test values one by one.
+    if len(accepted_values) > 0:
+        for value in values:
+            if value not in accepted_values:
+                raise ConfigurationError("One of the values in '{}' is invalid (current: '{}', accepted: {})".format(key, value, accepted_values))
+
+    # Return list.
+    return values
+
 
 class VQAMED2019(Problem):
     """
@@ -67,6 +99,10 @@ def __init__(self, name, config):
         # Call constructors of parent classes.
         Problem.__init__(self, name, VQAMED2019, config)
 
+        # (Eventually) download required packages.
+        nltk.download('punkt')
+        nltk.download('stopwords')
+
         # Get key mappings of all output streams.
         self.key_images = self.stream_keys["images"]
         self.key_image_ids = self.stream_keys["image_ids"]
@@ -100,9 +136,6 @@ def __init__(self, name, config):
         self.globals["category_word_mappings"] = {'C1': 0, 'C2': 1, 'C3': 2, 'C4': 3, 'BINARY': 4, '<UNK>': 5}
         self.category_idx_to_word = {0: 'C1', 1: 'C2', 2: 'C3', 3: 'C4', 4: 'BINARY', 5: '<UNK>'}
 
-        # Check if we want to remove punctuation from questions/answer
-        self.remove_punctuation = self.config["remove_punctuation"]
-
         # Get the absolute path.
         self.data_folder = os.path.expanduser(self.config['data_folder'])
 
@@ -138,6 +171,42 @@ def __init__(self, name, config):
 
             # Filter lists taking into account configuration.
             source_files, source_categories = self.filter_sources(source_files, source_categories)
+        # else: # TODO
+
+        # Get image augmentations.
+        self.image_preprocessing = get_value_list_from_dictionary(
+            "image_preprocessing", self.config,
+            'none | random_affine | random_horizontal_flip | normalize | all'.split(" | ")
+            )
+        if 'none' in self.image_preprocessing:
+            self.image_preprocessing = []
+        if 'all' in self.image_preprocessing:
+            self.image_preprocessing = 'random_affine | random_horizontal_flip | normalize'.split(" | ")
+        self.logger.info("Applied image augmentations: {}".format(self.image_preprocessing))
+
+
+        # Get question augmentations.
+        self.question_preprocessing = get_value_list_from_dictionary(
+            "question_preprocessing", self.config,
+            'none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all'.split(" | ")
+            )
+        if 'none' in self.question_preprocessing:
+            self.question_preprocessing = []
+        if 'all' in self.question_preprocessing:
+            self.question_preprocessing = 'lowercase | remove_punctuation | tokenize | remove_stop_words | shuffle_words'.split(" | ")
+        self.logger.info("Applied question augmentations: {}".format(self.question_preprocessing))
+
+        # Get answer preprocessing.
+        self.answer_preprocessing = get_value_list_from_dictionary(
+            "answer_preprocessing", self.config,
+            'none | lowercase | remove_punctuation | tokenize | all'.split(" | ")
+            )
+        if 'none' in self.answer_preprocessing:
+            self.answer_preprocessing = []
+        if 'all' in self.answer_preprocessing:
+            self.answer_preprocessing = 'lowercase | remove_punctuation | tokenize '.split(" | ")
+        self.logger.info("Applied answer preprocessing: {}".format(self.answer_preprocessing))
+
 
         # Load dataset.
         self.logger.info("Loading dataset from files:\n {}".format(source_files))
@@ -146,12 +215,13 @@ def __init__(self, name, config):
 
         # Display exemplary sample.
         self.logger.info("Exemplary sample:\n [ category: {}\t image_ids: {}\t question: {}\t answer: {} ]".format(
+            self.dataset[0][self.key_category_ids],
             self.dataset[0][self.key_image_ids],
             self.dataset[0][self.key_questions],
-            self.dataset[0][self.key_answers],
-            self.dataset[0][self.key_category_ids]
+            self.dataset[0][self.key_answers]
             ))
 
+
     def filter_sources(self, source_files, source_categories):
         """
         Loads the dataset from one or more files.
@@ -165,42 +235,90 @@ def filter_sources(self, source_files, source_categories):
         # Check categories that user want to use.
         use_files = [False] * 4
         categs = {'C1': 0, 'C2': 1, 'C3': 2, 'C4': 3}
-        for cat in self.config["categories"].replace(" ","").split(","):
+        # Parse categories from configuration list.
+        loaded_categs = get_value_list_from_dictionary("categories", self.config, ['C1', 'C2', 'C3', 'C4', 'all'])
+        for cat in loaded_categs:
             # "Special" case.
             if cat == "all":
                 use_files = [True] * 4
                 # Make no sense to continue.
                 break
             else:
-                if cat in categs.keys():
-                    use_files[categs[cat]] = True
+                use_files[categs[cat]] = True
         # Filter.
         _, source_files, source_categories = zip(*(filter(lambda x: x[0], zip(use_files, source_files,source_categories))))
         return source_files, source_categories
 
 
-    def preprocess_text(self, text, remove_stop_words = False):
+    def __len__(self):
+        """
+        Returns the "size" of the "problem" (total number of samples).
+
+        :return: The size of the problem.
+        """
+        return len(self.dataset)
+
+
+    def output_data_definitions(self):
+        """
+        Function returns a dictionary with definitions of output data produced the component.
+
+        :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        # Add all "standard" streams.
+        d = {
+            self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"),
+            self.key_images: DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]"),
+            self.key_image_ids: DataDefinition([-1, 1], [list, str], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]"),
+            self.key_image_sizes: DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]"),
+            self.key_category_ids: DataDefinition([-1], [torch.Tensor], "Batch of target category indices, each being a single index [BATCH_SIZE]"),
+            self.key_category_names: DataDefinition([-1, 1], [list, str], "Batch of category target names, each being a single word [BATCH_SIZE] x [STRING]"),
+            }
+
+        # Add stream with questions.
+        if 'tokenize' in self.question_preprocessing:
+            d[self.key_questions] = DataDefinition([-1, -1, 1], [list, list, str], "Batch of questions, each being a list of words [BATCH_SIZE] x [SEQ_LEN] x [STRING]")
+        else:
+            d[self.key_questions] = DataDefinition([-1, 1], [list, str], "Batch of questions, each being a string consisting of many words [BATCH_SIZE] x [STRING]")
+
+        # Add stream with answers.
+        if 'tokenize' in self.answer_preprocessing:
+            d[self.key_answers] = DataDefinition([-1, -1, 1], [list, list, str], "Batch of target answers, each being a list of words [BATCH_SIZE] x [SEQ_LEN] x [STRING]")
+        else:
+            d[self.key_answers]= DataDefinition([-1, 1], [list, str], "Batch of target answers, each being a string consisting of many words [BATCH_SIZE] x [STRING]")
+        return d
+
+
+    def preprocess_text(self, text, lowercase = False, remove_punctuation = False, tokenize = False, remove_stop_words = False):
         """
         Function that preprocesses questions/answers as suggested by ImageCLEF VQA challenge organizers:
-            * lowercases all words
-            * removes punctuation
+            * lowercases all words (optional)
+            * removes punctuation (optional)
             * removes stop words (optional)
 
         :param text: text to be processed.
+        :param lowercase: lowercases text (DEFAULT: False)
+        :param remove_punctuation: removes punctuation (DEFAULT: False)
+        :param tokenize: tokenizes the text (DEFAULT: False)
         :param remove_stop_words: removes stop words (DEFAULT: False)
 
         :return: Preprocessed and tokenized text (list of strings)
         """
         # Lowercase.
-        text = text.lower()
+        if lowercase:
+            text = text.lower()
 
         # Remove punctuation.
-        translator = str.maketrans('', '', string.punctuation)
-        text = text.translate(translator)
-
-        # Remove '“' and '”' !!!
-        text = text.replace('“','').replace('”','')
-
+        if remove_punctuation:
+            translator = str.maketrans('', '', string.punctuation)
+            text = text.translate(translator)
+            # Remove '“' and '”' !!!
+            text = text.replace('“','').replace('”','')
+
+        # If not tokenize - return text.
+        if not tokenize:
+            return text
+        
         # Tokenize.
         text_words = nltk.tokenize.word_tokenize(text)
 
@@ -229,9 +347,6 @@ def load_dataset(self, source_files, source_categories):
         # Set containing list of tuples.
         dataset = []
 
-        # Create table used for removing punctuations.
-        table = str.maketrans({key: None for key in string.punctuation})
-
         # Process files with categories.
         for data_file, category in zip(source_files, source_categories):
             # Set absolute path to file.
@@ -249,18 +364,28 @@ def load_dataset(self, source_files, source_categories):
                 answer = row[self.key_answers]
 
                 # Process question - if required.
-                if self.remove_punctuation in ["questions","all"]:
-                    question = question.translate(table)
+                preprocessed_question = self.preprocess_text(
+                    question,
+                    'lowercase' in self.question_preprocessing,
+                    'remove_punctuation' in self.question_preprocessing,
+                    'tokenize' in self.question_preprocessing,
+                    'remove_stop_words' in self.question_preprocessing
+                    )
 
                 # Process answer - if required.
-                if self.remove_punctuation in ["answers","all"]:
-                    answer = answer.translate(table)
+                preprocessed_answer = self.preprocess_text(
+                    answer,
+                    'lowercase' in self.answer_preprocessing,
+                    'remove_punctuation' in self.answer_preprocessing,
+                    'tokenize' in self.answer_preprocessing,
+                    False
+                    )
 
                 # Add record to dataset.
                 dataset.append({
                     self.key_image_ids: row[self.key_image_ids],
-                    self.key_questions: question,
-                    self.key_answers: answer,
+                    self.key_questions: preprocessed_question,
+                    self.key_answers: preprocessed_answer,
                     # Add category.
                     self.key_category_ids: category
                     })
@@ -271,31 +396,6 @@ def load_dataset(self, source_files, source_categories):
         # Return the created list.
         return dataset
 
-    def __len__(self):
-        """
-        Returns the "size" of the "problem" (total number of samples).
-
-        :return: The size of the problem.
-        """
-        return len(self.dataset)
-
-
-    def output_data_definitions(self):
-        """
-        Function returns a dictionary with definitions of output data produced the component.
-
-        :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
-        """
-        return {
-            self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"),
-            self.key_images: DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]"),
-            self.key_image_ids: DataDefinition([-1, 1], [list, str], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]"),
-            self.key_image_sizes: DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]"),
-            self.key_questions: DataDefinition([-1, 1], [list, str], "Batch of questions, each being a string consisting of many words [BATCH_SIZE] x [STRING]"),
-            self.key_answers: DataDefinition([-1, 1], [list, str], "Batch of target answers, each being a string consisting of many words [BATCH_SIZE] x [STRING]"),
-            self.key_category_ids: DataDefinition([-1], [torch.Tensor], "Batch of target category indices, each being a single index [BATCH_SIZE]"),
-            self.key_category_names: DataDefinition([-1, 1], [list, str], "Batch of category target names, each being a single word [BATCH_SIZE] x [STRING]"),
-            }
 
 
     def __getitem__(self, index):
@@ -318,23 +418,29 @@ def __getitem__(self, index):
         # Get its width and height.
         width, height = img.size
 
-        if(self.config['use_augmentation'] == 'True'):
+        image_transformations_list = []
+        # Optional.
+        if 'random_affine' in self.image_preprocessing:
             rotate = (-45, 135)
             translate = (0.05, 0.25)
             scale = (0.5, 2)
-            transforms_list = [transforms.RandomAffine(rotate, translate, scale), transforms.RandomHorizontalFlip()]
-        else:
-            transforms_list = []
-        # Resize the image and transform to Torch Tensor.
-        transforms_com = transforms.Compose(transforms_list + [
-                transforms.Resize([self.height,self.width]),
-                transforms.ToTensor(),
-                # Use normalization that the pretrained models from TorchVision require.
-                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-                ])
-        img = transforms_com(img) #.type(torch.FloatTensor).squeeze()
+            image_transformations_list.append(transforms.RandomAffine(rotate, translate, scale))
+        if 'random_horizontal_flip' in self.image_preprocessing:
+            image_transformations_list.append(transforms.RandomHorizontalFlip())
+            
+        # Add two obligatory transformations.
+        image_transformations_list.append(transforms.Resize([self.height,self.width]))
+        image_transformations_list.append(transforms.ToTensor())
+
+        # Optional normalizastion.
+        if 'normalize' in self.image_preprocessing:
+            # Use normalization that the pretrained models from TorchVision require.
+            image_transformations_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
 
-        #print("img: min_val = {} max_val = {}".format(torch.min(img),torch.max(img)) )
+        # Resize the image and transform to Torch Tensor.
+        transforms_com = transforms.Compose(image_transformations_list)
+        # Apply transformations.
+        img = transforms_com(img)
 
         # Create the resulting sample (data dict).
         data_dict = self.create_data_dict(index)
@@ -345,13 +451,18 @@ def __getitem__(self, index):
         # Scale width and height to range (0,1).
         data_dict[self.key_image_sizes] = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)])
 
-        # Question.
-        data_dict[self.key_questions] = item[self.key_questions]
-        data_dict[self.key_answers] = item[self.key_answers]
+        # Apply question transformations.
+        preprocessed_question = item[self.key_questions]
+        # TODO: apply additional random transformations e.g. "shuffle_words"
+        data_dict[self.key_questions] = preprocessed_question
+
+        # Return answer. 
+        preprocessed_answer = item[self.key_answers]
+        data_dict[self.key_answers] = preprocessed_answer
 
         # Question category related variables.
         # Check if this is binary question.
-        if self.predict_yes_no(item[self.key_questions]):
+        if self.predict_yes_no(item[self.key_answers]):
             data_dict[self.key_category_ids] = 4 # Binary.
             data_dict[self.key_category_names] = self.category_idx_to_word[4]
         else:
@@ -390,7 +501,7 @@ def collate_fn(self, batch):
         data_dict[self.key_image_ids] = [item[self.key_image_ids] for item in batch]
         data_dict[self.key_image_sizes] = torch.stack([item[self.key_image_sizes] for item in batch]).type(torch.FloatTensor)
 
-        # Collate lists.
+        # Collate lists/lists of lists.
         data_dict[self.key_questions] = [item[self.key_questions] for item in batch]
         data_dict[self.key_answers] = [item[self.key_answers] for item in batch]
 

From 409f993c615f3c831e3fbb1185f88aae8a24692f Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Fri, 26 Apr 2019 00:26:49 -0700
Subject: [PATCH 12/15] Added preprocessing to sentence tokenizer, cleanups

---
 .../components/text/sentence_tokenizer.yml    |  9 ++++
 .../image_text_to_class/vqa_med_2019.py       | 47 ++++--------------
 ptp/components/text/sentence_tokenizer.py     | 49 ++++++++++++++++---
 ptp/configuration/config_parsing.py           | 29 +++++++++++
 4 files changed, 90 insertions(+), 44 deletions(-)

diff --git a/configs/default/components/text/sentence_tokenizer.yml b/configs/default/components/text/sentence_tokenizer.yml
index 40714c7..233c221 100644
--- a/configs/default/components/text/sentence_tokenizer.yml
+++ b/configs/default/components/text/sentence_tokenizer.yml
@@ -8,6 +8,15 @@
 # False: sentence -> list of strings, True: list of strings -> sentence.
 detokenize: False 
 
+# Select applied preprocessing/augmentations (LOADED)
+# Use one (or more) of the transformations:
+# none | lowercase | remove_punctuation | all
+# Accepted formats: a,b,c or [a,b,c]
+preprocessing: none
+
+# List of characters to be removed 
+remove_characters: ''
+
 streams: 
   ####################################################################
   # 2. Keymappings associated with INPUT and OUTPUT streams.
diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
index 4a9616f..a06624c 100644
--- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py
+++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
@@ -24,7 +24,6 @@
 from PIL import Image
 
 import nltk
-from nltk.corpus import stopwords
 
 import torch
 from torchvision import transforms
@@ -32,36 +31,7 @@
 from ptp.components.problems.problem import Problem
 from ptp.data_types.data_definition import DataDefinition
 
-from ptp.configuration.configuration_error import ConfigurationError
-
-def get_value_list_from_dictionary(key, parameter_dict, accepted_values = []):
-    """
-    Parses parameter values retrieved from a given parameter dictionary using key.
-    Optionally, checks is all values are accepted.
-
-    :param key: Key of the parameter.
-    :param parameter_dict: Dictionary containing given key (e.g. config or globals)
-    :param accepted_values: List of accepted values (DEFAULT: [])
-
-    :return: List of parsed values
-    """
-    parameter = parameter_dict[key]
-    # Preprocess parameter value.
-    if (type(parameter) == str):
-        values = parameter.replace(" ","").split(",")
-    else:
-        values = parameter # list
-    assert type(values) == list, "Parameter value must be a list"
-
-    # Test values one by one.
-    if len(accepted_values) > 0:
-        for value in values:
-            if value not in accepted_values:
-                raise ConfigurationError("One of the values in '{}' is invalid (current: '{}', accepted: {})".format(key, value, accepted_values))
-
-    # Return list.
-    return values
-
+from ptp.configuration.config_parsing import get_value_list_from_dictionary
 
 class VQAMED2019(Problem):
     """
@@ -173,7 +143,7 @@ def __init__(self, name, config):
             source_files, source_categories = self.filter_sources(source_files, source_categories)
         # else: # TODO
 
-        # Get image augmentations.
+        # Get image preprocessing.
         self.image_preprocessing = get_value_list_from_dictionary(
             "image_preprocessing", self.config,
             'none | random_affine | random_horizontal_flip | normalize | all'.split(" | ")
@@ -182,10 +152,10 @@ def __init__(self, name, config):
             self.image_preprocessing = []
         if 'all' in self.image_preprocessing:
             self.image_preprocessing = 'random_affine | random_horizontal_flip | normalize'.split(" | ")
-        self.logger.info("Applied image augmentations: {}".format(self.image_preprocessing))
+        self.logger.info("Applied image preprocessing: {}".format(self.image_preprocessing))
 
 
-        # Get question augmentations.
+        # Get question preprocessing.
         self.question_preprocessing = get_value_list_from_dictionary(
             "question_preprocessing", self.config,
             'none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all'.split(" | ")
@@ -194,7 +164,7 @@ def __init__(self, name, config):
             self.question_preprocessing = []
         if 'all' in self.question_preprocessing:
             self.question_preprocessing = 'lowercase | remove_punctuation | tokenize | remove_stop_words | shuffle_words'.split(" | ")
-        self.logger.info("Applied question augmentations: {}".format(self.question_preprocessing))
+        self.logger.info("Applied question preprocessing: {}".format(self.question_preprocessing))
 
         # Get answer preprocessing.
         self.answer_preprocessing = get_value_list_from_dictionary(
@@ -310,10 +280,11 @@ def preprocess_text(self, text, lowercase = False, remove_punctuation = False, t
 
         # Remove punctuation.
         if remove_punctuation:
+            # Remove '“' and '”' and '’'!!!
+            for char in ['“', '”', '’']:
+                text = text.replace(char,' ')
             translator = str.maketrans('', '', string.punctuation)
             text = text.translate(translator)
-            # Remove '“' and '”' !!!
-            text = text.replace('“','').replace('”','')
 
         # If not tokenize - return text.
         if not tokenize:
@@ -327,7 +298,7 @@ def preprocess_text(self, text, lowercase = False, remove_punctuation = False, t
             return text_words
 
         # Perform "cleansing".
-        stops = set(stopwords.words("english"))
+        stops = set(nltk.corpus.stopwords.words("english"))
         cleansed_words = [word for word in text_words if word not in stops]
         # Return the original text if there are no words left :]
         if len(cleansed_words) == 0:
diff --git a/ptp/components/text/sentence_tokenizer.py b/ptp/components/text/sentence_tokenizer.py
index bd62526..1c94e95 100644
--- a/ptp/components/text/sentence_tokenizer.py
+++ b/ptp/components/text/sentence_tokenizer.py
@@ -14,11 +14,15 @@
 
 __author__ = "Tomasz Kornuta"
 
-from nltk.tokenize import WhitespaceTokenizer
+import nltk
+#from nltk.tokenize import WhitespaceTokenizer
+import string
 
 from ptp.components.component import Component
 from ptp.data_types.data_definition import DataDefinition
 
+from ptp.configuration.config_parsing import get_value_list_from_dictionary
+
 
 class SentenceTokenizer(Component):
     """
@@ -41,8 +45,26 @@ def __init__(self, name, config):
         # Read the actual configuration.
         self.mode_detokenize = config['detokenize']
 
+        # Get preprocessing.
+        self.preprocessing = get_value_list_from_dictionary(
+            "preprocessing", self.config,
+            'none | lowercase | remove_punctuation | all'.split(" | ")
+            )
+        if 'none' in self.preprocessing:
+            self.preprocessing = []
+        if 'all' in self.preprocessing:
+            self.preprocessing = 'lowercase | remove_punctuation'.split(" | ")
+        self.logger.info("Applied preprocessing: {}".format(self.preprocessing))
+
+        self.remove_characters = get_value_list_from_dictionary("remove_characters", self.config)
+        self.logger.info("Additional characters that will be removed during preprocessing: {}".format(self.remove_characters))
+
+
+        if 'remove_punctuation' in self.preprocessing:
+            self.translator = str.maketrans('', '', string.punctuation)
+
         # Tokenizer.
-        self.tokenizer = WhitespaceTokenizer()
+        self.tokenizer = nltk.tokenize.WhitespaceTokenizer()
 
         # Set key mappings.
         self.key_inputs = self.stream_keys["inputs"]
@@ -81,15 +103,30 @@ def output_data_definitions(self):
             return { self.key_outputs: DataDefinition([-1, 1], [list, str], "Batch of sentences, each represented as a single string [BATCH_SIZE] x [string]") }
 
 
-    def tokenize_sample(self, sample):
+    def tokenize_sample(self, text):
         """
-        Changes sample (sentence) into list of tokens (words).
+        Changes text (sentence) into list of tokens (words).
 
-        :param sample: sentence (string).
+        :param text: sentence (string).
 
         :return: list of words (strings).
         """
-        return self.tokenizer.tokenize(sample) # sample.split()
+        # Lowercase.
+        if 'lowercase' in self.preprocessing:
+            text = text.lower()
+
+        # Remove characters.
+        for char in self.remove_characters:
+            text = text.replace(char, ' ')
+
+        # Remove punctuation.
+        if 'remove_punctuation' in self.preprocessing:
+            text = text.translate(self.translator)
+
+        # Tokenize.
+        text_words = self.tokenizer.tokenize(text)
+
+        return text_words
 
     def detokenize_sample(self, sample):
         """
diff --git a/ptp/configuration/config_parsing.py b/ptp/configuration/config_parsing.py
index 7c2aeb3..8a557df 100644
--- a/ptp/configuration/config_parsing.py
+++ b/ptp/configuration/config_parsing.py
@@ -210,3 +210,32 @@ def reverse_order_config_load(config_interface_obj, configs_to_load, abs_config_
         # Load config from YAML file.
         config_interface_obj.add_config_params_from_yaml(abs_config_path + config)
         print('Info: Loaded configuration from file {}'.format(abs_config_path + config))
+
+
+def get_value_list_from_dictionary(key, parameter_dict, accepted_values = []):
+    """
+    Parses parameter values retrieved from a given parameter dictionary using key.
+    Optionally, checks is all values are accepted.
+
+    :param key: Key of the parameter.
+    :param parameter_dict: Dictionary containing given key (e.g. config or globals)
+    :param accepted_values: List of accepted values (DEFAULT: [])
+
+    :return: List of parsed values
+    """
+    parameter = parameter_dict[key]
+    # Preprocess parameter value.
+    if (type(parameter) == str):
+        values = parameter.replace(" ","").split(",")
+    else:
+        values = parameter # list
+    assert type(values) == list, "Parameter value must be a list"
+
+    # Test values one by one.
+    if len(accepted_values) > 0:
+        for value in values:
+            if value not in accepted_values:
+                raise ConfigurationError("One of the values in '{}' is invalid (current: '{}', accepted: {})".format(key, value, accepted_values))
+
+    # Return list.
+    return values

From 477eed4aaa75a9a9237fc60bdb5432d635a27660 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Fri, 26 Apr 2019 00:27:26 -0700
Subject: [PATCH 13/15] C4: config for classification of answer depending on
 answer words

---
 .../c4_word_answer_onehot_bow.yml             | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml

diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml
new file mode 100644
index 0000000..f41c722
--- /dev/null
+++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml
@@ -0,0 +1,62 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml
+
+# Training parameters:
+training:
+  problem:
+    batch_size: 128
+    remove_punctuation: all
+
+# Validation parameters:
+validation:
+  problem:
+    batch_size: 128
+    remove_punctuation: all
+
+pipeline:
+  name: c4_word_answer_onehot_bow
+
+  # Answer encoding.
+  answer_tokenizer:
+    type: SentenceTokenizer
+    priority: 1.1
+    preprocessing: lowercase,remove_punctuation
+    remove_characters: [“,”,’]
+    streams: 
+      inputs: answers
+      outputs: tokenized_answer_words
+
+  answer_onehot_encoder:
+    type: SentenceOneHotEncoder
+    priority: 1.2
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: tokenized_answer_words
+      outputs: encoded_answer_words
+    globals:
+      vocabulary_size: answer_words_vocabulary_size
+      word_mappings: answer_words_word_mappings
+
+  answer_bow_encoder:
+    type: BOWEncoder
+    priority: 1.3
+    streams:
+      inputs: encoded_answer_words
+      outputs: bow_answer_words
+    globals:
+        bow_size: answer_words_vocabulary_size
+
+  # Model.
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [100, 100]
+    priority: 3
+    streams:
+      inputs: bow_answer_words
+    globals:
+      input_size: answer_words_vocabulary_size
+      prediction_size: vocabulary_size_c4
+  
+#: pipeline

From ee8cad569ded8479f82dd1ce41c5a4ac67e50921 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Fri, 26 Apr 2019 00:36:37 -0700
Subject: [PATCH 14/15] C4: config for classification of answer depending on
 answer words, dropout: 0.5 one hidden layer (500)

---
 .../c4_classification/c4_word_answer_onehot_bow.yml            | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml
index f41c722..842a987 100644
--- a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml
+++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml
@@ -51,7 +51,8 @@ pipeline:
   # Model.
   classifier:
     type: FeedForwardNetwork 
-    hidden_sizes: [100, 100]
+    hidden_sizes: [500]
+    dropout_rate: 0.5
     priority: 3
     streams:
       inputs: bow_answer_words

From 07270dc9f4d0b605ab4177daf4aa3e9392acf768 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Fri, 26 Apr 2019 01:08:16 -0700
Subject: [PATCH 15/15] comment

---
 .../components/problems/image_text_to_class/vqa_med_2019.yml   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml
index bd724b4..da0de78 100644
--- a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml
+++ b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml
@@ -37,7 +37,8 @@ image_preprocessing: normalize
 question_preprocessing: lowercase, remove_punctuation
 
 # Select applied question preprocessing (LOADED)
-# Options: none | lowercase | remove_punctuation | tokenize | all
+# Use one (or more) of the transformations:
+# none | lowercase | remove_punctuation | tokenize | all
 # Accepted formats: a,b,c or [a,b,c]
 answer_preprocessing: none