From 7e8fb02bbb0ff98ab316a66d172b34cb6d46caf8 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 15:40:17 -0700
Subject: [PATCH 01/10] minor fixes to processor (added default terminal
 condition etc.)

---
 configs/default/workers/processor.yml         |  5 +++++
 .../problems/image_text_to_class/__init__.py  |  2 ++
 ptp/workers/processor.py                      | 20 +++++++++----------
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/configs/default/workers/processor.yml b/configs/default/workers/processor.yml
index 7737233..b8989cc 100644
--- a/configs/default/workers/processor.yml
+++ b/configs/default/workers/processor.yml
@@ -37,6 +37,11 @@ test:
   #  type: RandomSampler
   #  The rest of the content of that section is optimizer-specific...
 
+ # Terminal condition that will be used during processing.
+  terminal_conditions:
+    # Terminal condition : maximal number of episodes (Optional, -1 means that processor will perform one pass over the whole dataset/split)
+    episode_limit: -1
+
 
 ####################################################################
 # Section defining all the default values of parameters used during training.
diff --git a/ptp/components/problems/image_text_to_class/__init__.py b/ptp/components/problems/image_text_to_class/__init__.py
index a520d4c..eb52c35 100644
--- a/ptp/components/problems/image_text_to_class/__init__.py
+++ b/ptp/components/problems/image_text_to_class/__init__.py
@@ -1,5 +1,7 @@
+from .clevr import CLEVR
 from .vqa_med_2019 import VQAMED2019
 
 __all__ = [
+    'CLEVR',
     'VQAMED2019',
     ]
diff --git a/ptp/workers/processor.py b/ptp/workers/processor.py
index 9edad60..671db33 100644
--- a/ptp/workers/processor.py
+++ b/ptp/workers/processor.py
@@ -223,20 +223,20 @@ def setup_individual_experiment(self):
         # check if the maximum number of episodes is specified, if not put a
         # default equal to the size of the dataset (divided by the batch size)
         # So that by default, we loop over the test set once.
-        max_test_episodes = len(self.pm)
+        problem_size_in_episodes = len(self.pm)
 
-        self.config_test['problem'].add_default_params({'max_test_episodes': max_test_episodes})
-        if self.config_test["problem"]["max_test_episodes"] == -1:
+        if self.config_test["terminal_conditions"]["episode_limit"] == -1:
             # Overwrite the config value!
-            self.config_test['problem'].add_config_params({'max_test_episodes': max_test_episodes})
+            self.config_test['terminal_conditions'].add_config_params({'episode_limit': problem_size_in_episodes})
 
         # Warn if indicated number of episodes is larger than an epoch size:
-        if self.config_test["problem"]["max_test_episodes"] > max_test_episodes:
-            self.logger.warning('Indicated maximum number of episodes is larger than one epoch, reducing it.')
-            self.config_test['problem'].add_config_params({'max_test_episodes': max_test_episodes})
+        if self.config_test["terminal_conditions"]["episode_limit"] > problem_size_in_episodes:
+            self.logger.warning('Indicated limit of number of episodes is larger than one epoch, reducing it.')
+            # Overwrite the config value!
+            self.config_test['terminal_conditions'].add_config_params({'episode_limit': problem_size_in_episodes})
 
-        self.logger.info("Setting the max number of episodes to: {}".format(
-            self.config_test["problem"]["max_test_episodes"]))
+        self.logger.info("Limiting the number of episodes to: {}".format(
+            self.config_test["terminal_conditions"]["episode_limit"]))
 
         ###################### PIPELINE ######################
         
@@ -384,7 +384,7 @@ def run_experiment(self):
                     # Increment counter.
                     self.app_state.episode += 1
                     # Terminal condition 0: max test episodes reached.
-                    if self.app_state.episode == self.config_test["problem"]["max_test_episodes"]:
+                    if self.app_state.episode == self.config_test["terminal_conditions"]["episode_limit"]:
                         break
 
                     # Forward pass.

From 20af6c50c98e922db21ed34c4909ee113adeda62 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 15:40:38 -0700
Subject: [PATCH 02/10] CLEVR - loading questions

---
 configs/clevr/default_clevr.yml               |  62 ++++
 .../problems/image_text_to_class/clevr.yml    |  72 +++++
 .../problems/image_text_to_class/clevr.py     | 291 ++++++++++++++++++
 3 files changed, 425 insertions(+)
 create mode 100644 configs/clevr/default_clevr.yml
 create mode 100644 configs/default/components/problems/image_text_to_class/clevr.yml
 create mode 100644 ptp/components/problems/image_text_to_class/clevr.py

diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml
new file mode 100644
index 0000000..e5303e0
--- /dev/null
+++ b/configs/clevr/default_clevr.yml
@@ -0,0 +1,62 @@
+# Training parameters:
+training:
+  problem: 
+    type: CLEVR
+    batch_size: &b 64
+    split: training
+    #resize: [32, 32]
+  # optimizer parameters:
+  optimizer:
+    type: Adam
+    lr: 0.0001
+  # settings parameters
+  terminal_conditions:
+    loss_stop_threshold: 0.05
+    early_stop_validations: -1
+    episode_limit: 10000
+    epoch_limit: 10
+
+# Validation parameters:
+validation:
+  #partial_validation_interval: 100
+  problem:
+    type: CLEVR
+    batch_size: *b
+    split: validation
+    #resize: [32, 32]
+
+# Testing parameters:
+test:
+  problem:
+    type: CLEVR
+    batch_size: *b
+    split: test
+    #resize: [32, 32]
+
+pipeline:
+  name: default
+  disable: image_viewer
+
+  # Loss
+  nllloss:
+    type: NLLLoss
+    priority: 10.0
+
+  # Statistics.
+  batch_size:
+    priority: 100.0
+    type: BatchSizeStatistics
+
+  stream_viewer:
+    priority: 100.4
+    type: StreamViewer
+    input_streams: index, question
+
+  #image_viewer:
+  #  priority: 100.5
+  #  type: ImageToClassViewer
+  #  streams:
+  #    images: inputs
+  #    labels: labels
+  #    answers: answers
+
diff --git a/configs/default/components/problems/image_text_to_class/clevr.yml b/configs/default/components/problems/image_text_to_class/clevr.yml
new file mode 100644
index 0000000..7e62f09
--- /dev/null
+++ b/configs/default/components/problems/image_text_to_class/clevr.yml
@@ -0,0 +1,72 @@
+# This file defines the default values for the CLEVR problem.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Folder where problem will store data (LOADED)
+data_folder: '~/data/CLEVR_v1.0'
+
+# Defines the set (split) that will be used (LOADED)
+# Options: training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation
+split: training
+
+# Defines the categoriees that will be used (LOADED)
+# Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4)
+#categories: all
+
+# Flag indicating whether the problem will load and return images (LOADED)
+stream_images: True
+
+streams:
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of indices (OUTPUT)
+  # Every problem MUST return that stream.
+  indices: indices
+
+  # Stream containing batch of images (OUTPUT)
+  images: images
+
+  # Stream containing batch of image names (OUTPUT)
+  image_ids: image_ids
+
+  # Stream containing batch of questions (OUTPUT)
+  questions: questions
+
+  # Stream containing targets - answers (OUTPUT)
+  answers: answers
+
+  # Stream containing scene descriptions (OUTPUT)
+  answers: scene_graphs
+
+  # Stream containing batch with question type - indices (OUTPUT)
+  category_ids: question_type_id
+
+  # Stream containing batch with question type - names (OUTPUT)
+  category_names: question_type_name
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  # Width of the image (SET)
+  input_width: image_width
+  # Height of the image (SET)
+  input_height: image_height
+  # Depth of the image (SET)
+  input_depth: image_depth
+
+  # Question type (word-idx) mappings (SET)
+  question_type_word_mappings: question_type_word_mappings
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py
new file mode 100644
index 0000000..0d710cf
--- /dev/null
+++ b/ptp/components/problems/image_text_to_class/clevr.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) IBM Corporation 2019
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__author__ = "Tomasz Kornuta, Vincent Marois"
+
+import os
+import string
+import tqdm
+
+#import pandas as pd
+#from PIL import Image
+#import numpy as np
+#import nltk
+import json
+
+import torch
+#from torchvision import transforms
+
+from ptp.components.problems.problem import Problem
+from ptp.data_types.data_definition import DataDefinition
+
+#from ptp.components.utils.io import save_nparray_to_csv_file
+from ptp.configuration.config_parsing import get_value_from_dictionary
+from ptp.configuration.configuration_error import ConfigurationError
+
+class CLEVR(Problem):
+    """
+    Problem providing data associated with CLEVR (Compositional Language andElementary Visual Reasoning) diagnostics dataset
+
+    The dataset consists of three splits:
+        - A training set of 70,000 images and 699,989 questions
+        - A validation set of 15,000 images and 149,991 questions
+        - A test set of 15,000 images and 14,988 questions
+        - Answers for all train and val questions
+        - Scene graph annotations for train and val images giving ground-truth locations, attributes, and relationships for objects
+        - Functional program representations for all training and validation images
+
+    CLEVR contains a total of 90 question families, eachwith a single program template and an average of four texttemplates.
+    Those are further aggregated into 13 Question Types:
+        - Querying attributes (Size, Color, Material, Shape)
+        - Comparing attributes (Size, Color, Material, Shape)
+        - Existence
+        - Counting
+        - Integer comparison (Equal, Less, More)
+
+    For more details please refer to the associated _website or _paper for more details.
+    Test set with answers can be downloaded from a separate repository _repo.
+
+    .. _website: https://cs.stanford.edu/people/jcjohns/clevr/
+
+    .._paper: https://arxiv.org/pdf/1612.06890
+
+    """
+    def __init__(self, name, config):
+        """
+        Initializes problem object. Calls base constructor. Downloads the dataset if not present and loads the adequate files depending on the mode.
+
+        :param name: Name of the component.
+
+        :param class_type: Class type of the component.
+
+        :param config: Dictionary of parameters (read from configuration ``.yaml`` file).
+        """
+        # Call constructors of parent classes.
+        Problem.__init__(self, name, CLEVR, config)
+
+        # (Eventually) download required packages.
+        #nltk.download('punkt')
+        #nltk.download('stopwords')
+
+        # Get key mappings of all output streams.
+        self.key_images = self.stream_keys["images"]
+        self.key_image_ids = self.stream_keys["image_ids"]
+        self.key_questions = self.stream_keys["questions"]
+        self.key_answers = self.stream_keys["answers"]
+        self.key_question_type_ids = self.stream_keys["question_type_ids"]
+        self.key_question_type_names = self.stream_keys["question_type_names"]
+
+        # Get flag informing whether we want to stream images or not.
+        self.stream_images = self.config['stream_images']
+
+        # Output image dimensions.
+        self.height = 480 # self.config['resize_image'][0]
+        self.width = 320 #self.config['resize_image'][1]
+        self.depth = 3
+
+        # Set global variables - all dimensions ASIDE OF BATCH.
+        self.globals["image_height"] = self.height
+        self.globals["image_width"] = self.width
+        self.globals["image_depth"] = self.depth
+
+        # Mapping of question subtypes to types.
+        self.question_type_subtype_mapping = {
+            'query_size': 'query_attribute',
+            'equal_size': 'compare_attribute',
+            'query_shape': 'query_attribute',
+            'query_color': 'query_attribute',
+            'greater_than': 'compare_integer',
+            'equal_material': 'compare_attribute',
+            'equal_color': 'compare_attribute',
+            'equal_shape': 'compare_attribute',
+            'less_than': 'compare_integer',
+            'count': 'count',
+            'exist': 'exist',
+            'equal_integer': 'compare_integer',
+            'query_material': 'query_attribute'}
+
+
+
+        # Get the absolute path.
+        self.data_folder = os.path.expanduser(self.config['data_folder'])
+
+        # Get split.
+        split = get_value_from_dictionary('split', self.config, "training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation".split(" | "))
+
+        # Set split-dependent data.
+        if split == 'training':
+            # Training split folder and file with data question.
+            split_image_folder = os.path.join(self.data_folder, "images", "train")
+            data_file = os.path.join(self.data_folder, "questions", 'CLEVR_train_questions.json')
+
+        elif split == 'validation':
+            # Validation split folder and file with data question.
+            split_image_folder = os.path.join(self.data_folder, "images", "val")
+            data_file = os.path.join(self.data_folder, "questions", 'CLEVR_val_questions.json')
+
+        elif split == 'test':
+            # Test split folder and file with data question.
+            split_image_folder = os.path.join(self.data_folder, "images", "test")
+            data_file = os.path.join(self.data_folder, "questions", 'CLEVR_test_questions.json')
+
+        else: # cogent
+            raise ConfigurationError("Split {} not supported yet".format(split))
+
+        # Load dataset.
+        self.dataset = self.load_dataset(data_file, split_image_folder)
+
+        # Display exemplary sample.
+        #self.logger.info("Exemplary sample 0 ({}):\n [ category: {}\t image_ids: {}\t question: {}\t answer: {} ]".format(
+        #    self.ix[0],
+        #    self.category_idx_to_word[self.dataset[self.ix[0]][self.key_question_type_ids]],
+        #    self.dataset[self.ix[0]][self.key_image_ids],
+        #    self.dataset[self.ix[0]][self.key_questions],
+        #    self.dataset[self.ix[0]][self.key_answers]
+        #    ))
+
+
+
+    def output_data_definitions(self):
+        """
+        Function returns a dictionary with definitions of output data produced the component.
+
+        :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        # Add all "standard" streams.
+        d = {
+            self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"),
+            self.key_image_ids: DataDefinition([-1, 1], [list, str], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]"),
+            self.key_question_type_ids: DataDefinition([-1], [torch.Tensor], "Batch of target question type indices, each being a single index [BATCH_SIZE]"),
+            self.key_question_type_names: DataDefinition([-1, 1], [list, str], "Batch of target question type names, each being a single word [BATCH_SIZE] x [STRING]"),
+            }
+        
+        # Return images only when required.
+        if self.stream_images:
+            d[self.key_images] = DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]")
+
+        # Add stream with questions.
+        d[self.key_questions] = DataDefinition([-1, 1], [list, str], "Batch of questions, each being a string consisting of many words [BATCH_SIZE] x [STRING]")
+
+        # Add stream with answers.
+        d[self.key_answers]= DataDefinition([-1, 1], [list, str], "Batch of target answers, each being a string consisting of many words [BATCH_SIZE] x [STRING]")
+
+        return d
+
+
+    def __len__(self):
+        """
+        Returns the "size" of the "problem" (total number of samples).
+
+        :return: The size of the problem.
+        """
+        return len(self.dataset)
+
+
+    def load_dataset(self, source_data_file, source_image_folder):
+        """
+        Loads the dataset from source file
+
+        :param source_data_file: jSON file with image ids, questions, answers, scene graphs, etc.
+
+        :param source_image_folder: Folder containing image files.
+
+        """
+        self.logger.info("Loading dataset from:\n {}".format(source_data_file))
+        # Set containing list of tuples.
+        dataset = []
+
+        with open(source_data_file) as f:
+            self.logger.info('Loading samples from {} ...'.format(source_data_file))
+            dataset = json.load(f)
+        self.logger.info('Loaded {} samples'.format(len(dataset['questions'])))
+        print(dataset["questions"][0])
+        exit(1)
+
+
+        self.logger.info("Loaded dataset consisting of {} samples".format(len(dataset)))
+        # Return the created list.
+        return dataset
+
+
+    def __getitem__(self, index):
+        """
+        Getter method to access the dataset and return a single sample.
+
+        :param index: index of the sample to return.
+        :type index: int
+
+        :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'})
+        """
+        # Get item.
+        item = self.dataset[self.ix[index]]
+
+        # Create the resulting sample (data dict).
+        data_dict = self.create_data_dict(index)
+
+        # Load and stream the image ids.
+        img_id = item[self.key_image_ids]
+        data_dict[self.key_image_ids] = img_id
+
+        # Load the adequate image - only when required.
+        if self.stream_images:
+
+            # Image related variables.
+            data_dict[self.key_images] = item[self.key_images]
+
+        # Return question.
+        data_dict[self.key_questions] = item[self.key_questions]
+
+        # Return answer. 
+        data_dict[self.key_answers] = item[self.key_answers]
+
+        # Question type related variables.
+        data_dict[self.key_question_type_ids] = item[self.key_question_type_ids]
+        data_dict[self.key_question_type_names] = self.category_idx_to_word[item[self.key_question_type_ids]]
+
+        # Return sample.
+        return data_dict
+
+
+    def collate_fn(self, batch):
+        """
+        Combines a list of DataDict (retrieved with :py:func:`__getitem__`) into a batch.
+
+        :param batch: list of individual samples to combine
+        :type batch: list
+
+        :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'})
+
+        """
+        # Collate indices.
+        data_dict = self.create_data_dict([sample[self.key_indices] for sample in batch])
+
+        # Stack images.
+        data_dict[self.key_image_ids] = [item[self.key_image_ids] for item in batch]
+        if self.stream_images:
+            data_dict[self.key_images] = torch.stack([item[self.key_images] for item in batch]).type(torch.FloatTensor)
+
+        # Collate lists/lists of lists.
+        data_dict[self.key_questions] = [item[self.key_questions] for item in batch]
+        data_dict[self.key_answers] = [item[self.key_answers] for item in batch]
+
+        # Stack categories.
+        data_dict[self.key_question_type_ids] = torch.tensor([item[self.key_question_type_ids] for item in batch])
+        data_dict[self.key_question_type_names] = [item[self.key_question_type_names] for item in batch]
+
+        # Return collated dict.
+        return data_dict

From 3b1f87053e0d1a3cd24f2526014c4ea70e11cb5b Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 20:06:43 -0700
Subject: [PATCH 03/10] simple convnet classifier for CLEVR

---
 ...r_image_classification_convnet_softmax.yml |  37 ++++
 configs/clevr/default_clevr.yml               |  23 +-
 .../problems/image_text_to_class/clevr.yml    |  21 +-
 .../default/components/text/label_indexer.yml |   2 +-
 .../problems/image_text_to_class/clevr.py     | 204 +++++++++++++-----
 .../image_text_to_class/vqa_med_2019.py       |   2 +-
 6 files changed, 217 insertions(+), 72 deletions(-)
 create mode 100644 configs/clevr/clevr_image_classification_convnet_softmax.yml

diff --git a/configs/clevr/clevr_image_classification_convnet_softmax.yml b/configs/clevr/clevr_image_classification_convnet_softmax.yml
new file mode 100644
index 0000000..469008d
--- /dev/null
+++ b/configs/clevr/clevr_image_classification_convnet_softmax.yml
@@ -0,0 +1,37 @@
+# Load config defining CLEVR problems for training, validation and testing.
+default_configs: clevr/default_clevr.yml
+
+# Definition of the pipeline.
+pipeline:
+
+  # Model consisting of two components.
+  image_encoder:
+    priority: 1.1
+    type: ConvNetEncoder
+    streams:
+      inputs: images
+
+  # Reshape inputs
+  reshaper:
+    priority: 1.2
+    type: ReshapeTensor
+    input_dims: [-1, 16, 58, 38]
+    output_dims: [-1, 35264]
+    streams:
+      inputs: feature_maps
+      outputs: reshaped_maps
+    globals:
+      output_size: reshaped_maps_size
+
+  # Image classifier.
+  classifier:
+    priority: 1.3
+    type: FeedForwardNetwork 
+    hidden_sizes: [1000]
+    streams:
+      inputs: reshaped_maps
+    globals:
+      input_size: reshaped_maps_size
+      prediction_size: num_answers
+
+#: pipeline
diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml
index e5303e0..97f7fb3 100644
--- a/configs/clevr/default_clevr.yml
+++ b/configs/clevr/default_clevr.yml
@@ -4,7 +4,7 @@ training:
     type: CLEVR
     batch_size: &b 64
     split: training
-    #resize: [32, 32]
+    #resize: [224, 224]
   # optimizer parameters:
   optimizer:
     type: Adam
@@ -18,12 +18,11 @@ training:
 
 # Validation parameters:
 validation:
-  #partial_validation_interval: 100
   problem:
     type: CLEVR
     batch_size: *b
     split: validation
-    #resize: [32, 32]
+    #resize: [224, 224]
 
 # Testing parameters:
 test:
@@ -31,16 +30,28 @@ test:
     type: CLEVR
     batch_size: *b
     split: test
-    #resize: [32, 32]
+    #resize: [224, 224]
 
 pipeline:
-  name: default
   disable: image_viewer
 
+  label_to_target:
+    type: LabelIndexer
+    priority: 0.1
+    # Load word mappings for answers.
+    data_folder: '~/data/CLEVR_v1.0'
+    word_mappings_file: 'answers.all.csv'
+    export_word_mappings_to_globals: True
+    globals:
+      vocabulary_size: num_answers
+    streams:
+      inputs: answers 
+      outputs: targets
+
   # Loss
   nllloss:
     type: NLLLoss
-    priority: 10.0
+    priority: 10.1
 
   # Statistics.
   batch_size:
diff --git a/configs/default/components/problems/image_text_to_class/clevr.yml b/configs/default/components/problems/image_text_to_class/clevr.yml
index 7e62f09..3914bea 100644
--- a/configs/default/components/problems/image_text_to_class/clevr.yml
+++ b/configs/default/components/problems/image_text_to_class/clevr.yml
@@ -11,13 +11,20 @@ data_folder: '~/data/CLEVR_v1.0'
 # Options: training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation
 split: training
 
-# Defines the categoriees that will be used (LOADED)
-# Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4)
-#categories: all
-
 # Flag indicating whether the problem will load and return images (LOADED)
 stream_images: True
 
+# Resize parameter (LOADED)
+# When present, resizes the images from original size to [height, width]
+# Depth remains set to 3.
+#resize_image: [height, width]
+
+# Select applied image preprocessing/augmentations (LOADED)
+# Use one (or more) of the affine transformations:
+# none | normalize | all
+# Accepted formats: a,b,c or [a,b,c]
+image_preprocessing: normalize
+
 streams:
   ####################################################################
   # 2. Keymappings associated with INPUT and OUTPUT streams.
@@ -40,13 +47,13 @@ streams:
   answers: answers
 
   # Stream containing scene descriptions (OUTPUT)
-  answers: scene_graphs
+  #answers: scene_graphs
 
   # Stream containing batch with question type - indices (OUTPUT)
-  category_ids: question_type_id
+  category_ids: question_type_ids
 
   # Stream containing batch with question type - names (OUTPUT)
-  category_names: question_type_name
+  category_names: question_type_names
 
 globals:
   ####################################################################
diff --git a/configs/default/components/text/label_indexer.yml b/configs/default/components/text/label_indexer.yml
index d402f43..f2c0a48 100644
--- a/configs/default/components/text/label_indexer.yml
+++ b/configs/default/components/text/label_indexer.yml
@@ -1,4 +1,4 @@
-# This file defines the default values for Label Indexer.
+# This file defines the default values for LabelIndexer.
 
 ####################################################################
 # 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py
index 0d710cf..e40e411 100644
--- a/ptp/components/problems/image_text_to_class/clevr.py
+++ b/ptp/components/problems/image_text_to_class/clevr.py
@@ -15,26 +15,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__author__ = "Tomasz Kornuta, Vincent Marois"
+__author__ = "Tomasz Kornuta"
 
 import os
-import string
+#import string
+import json
 import tqdm
 
-#import pandas as pd
-#from PIL import Image
+from PIL import Image
 #import numpy as np
 #import nltk
-import json
 
 import torch
-#from torchvision import transforms
+from torchvision import transforms
 
 from ptp.components.problems.problem import Problem
 from ptp.data_types.data_definition import DataDefinition
 
 #from ptp.components.utils.io import save_nparray_to_csv_file
-from ptp.configuration.config_parsing import get_value_from_dictionary
+from ptp.configuration.config_parsing import get_value_from_dictionary, get_value_list_from_dictionary
 from ptp.configuration.configuration_error import ConfigurationError
 
 class CLEVR(Problem):
@@ -93,32 +92,93 @@ def __init__(self, name, config):
         # Get flag informing whether we want to stream images or not.
         self.stream_images = self.config['stream_images']
 
-        # Output image dimensions.
-        self.height = 480 # self.config['resize_image'][0]
-        self.width = 320 #self.config['resize_image'][1]
-        self.depth = 3
+        # Check the resize image option.
+        if "resize_image" in self.config:
+            if len(self.config['resize_image']) != 2:
+                self.logger.error("'resize_image' field must contain 2 values: the desired height and width")
+                exit(-1)
+
+            # Output image dimensions.
+            self.height = self.config['resize_image'][0]
+            self.width = self.config['resize_image'][1]
+            self.depth = 3
+            resize = True
+        else:
+            # Use original image dimensions.
+            self.height = 480 
+            self.width = 320 
+            self.depth = 3
+            resize = False
 
         # Set global variables - all dimensions ASIDE OF BATCH.
         self.globals["image_height"] = self.height
         self.globals["image_width"] = self.width
         self.globals["image_depth"] = self.depth
 
-        # Mapping of question subtypes to types.
-        self.question_type_subtype_mapping = {
-            'query_size': 'query_attribute',
-            'equal_size': 'compare_attribute',
-            'query_shape': 'query_attribute',
-            'query_color': 'query_attribute',
-            'greater_than': 'compare_integer',
-            'equal_material': 'compare_attribute',
-            'equal_color': 'compare_attribute',
-            'equal_shape': 'compare_attribute',
-            'less_than': 'compare_integer',
-            'count': 'count',
-            'exist': 'exist',
-            'equal_integer': 'compare_integer',
-            'query_material': 'query_attribute'}
+        # Get image preprocessing.
+        self.image_preprocessing = get_value_list_from_dictionary(
+            "image_preprocessing", self.config,
+            'none | normalize | all'.split(" | ")
+            )
+        if resize:
+            # Add resize as transformation.
+            if 'none' in self.image_preprocessing:
+                self.image_preprocessing = ["resize"]
+            if 'all' in self.image_preprocessing:
+                self.image_preprocessing = ["resize", 'normalize']
+        else:
+            if 'none' in self.image_preprocessing:
+                self.image_preprocessing = []
+            if 'all' in self.image_preprocessing:
+                self.image_preprocessing = ['normalize']
+        self.logger.info("Applied image preprocessing: {}".format(self.image_preprocessing))
+
+        # Mapping of question subtypes to types (not used, but keeping it just in case).
+        #self.question_subtype_to_type_mapping = {
+        #    'query_size': 'query_attribute',
+        #    'equal_size': 'compare_attribute',
+        #    'query_shape': 'query_attribute',
+        #    'query_color': 'query_attribute',
+        #    'greater_than': 'compare_integer',
+        #    'equal_material': 'compare_attribute',
+        #    'equal_color': 'compare_attribute',
+        #    'equal_shape': 'compare_attribute',
+        #    'less_than': 'compare_integer',
+        #    'count': 'count',
+        #    'exist': 'exist',
+        #    'equal_integer': 'compare_integer',
+        #    'query_material': 'query_attribute'}
 
+        # Mapping of question subtypes to types.
+        self.question_subtype_to_id_mapping = {
+            'query_size': 0,
+            'equal_size': 1,
+            'query_shape': 2,
+            'query_color': 3,
+            'greater_than': 4,
+            'equal_material': 5,
+            'equal_color': 6,
+            'equal_shape': 7,
+            'less_than': 8,
+            'count': 9,
+            'exist': 10,
+            'equal_integer': 11,
+            'query_material': 12}
+
+        # Mapping of question families to subtypes.
+        self.question_family_id_to_subtype_mapping = {
+            0: "equal_integer", 1: "less_than", 2: "greater_than", 3: "equal_integer", 4: "less_than", 5: "greater_than", 6: "equal_integer", 7: "less_than", 8: "greater_than", 9: "equal_size",
+            10: "equal_color", 11: "equal_material", 12: "equal_shape", 13: "equal_size", 14: "equal_size", 15: "equal_size", 16: "equal_color", 17: "equal_color", 18: "equal_color", 19: "equal_material",
+            20: "equal_material", 21: "equal_material", 22: "equal_shape", 23: "equal_shape", 24: "equal_shape", 25: "count", 26: "exist", 27: "query_size", 28: "query_shape", 29: "query_color",
+            30: "query_material", 31: "count", 32: "query_size", 33: "query_color", 34: "query_material", 35: "query_shape", 36: "exist", 37: "exist", 38: "exist", 39: "exist",
+            40: "count", 41: "count", 42: "count", 43: "count", 44: "exist", 45: "exist", 46: "exist", 47: "exist", 48: "count", 49: "count",
+            50: "count", 51: "count", 52: "query_color", 53: "query_material", 54: "query_shape", 55: "query_size", 56: "query_material", 57: "query_shape", 58: "query_size", 59: "query_color",
+            60: "query_shape", 61: "query_size", 62: "query_color", 63: "query_material", 64: "count", 65: "count", 66: "count", 67: "count", 68: "count", 69: "count",
+            70: "count", 71: "count", 72: "count", 73: "exist", 74: "query_size", 75: "query_color", 76: "query_material", 77: "query_shape", 78: "count", 79: "exist",
+            80: "query_size", 81: "query_color", 82: "query_material", 83: "query_shape", 84: "count", 85: "exist", 86: "query_shape", 87: "query_material", 88: "query_color", 89: "query_size"}
+
+        # Finally, "merge" those two.
+        self.question_family_id_to_subtype_id_mapping = { key: self.question_subtype_to_id_mapping[value] for key, value in self.question_family_id_to_subtype_mapping.items() }
 
 
         # Get the absolute path.
@@ -130,33 +190,35 @@ def __init__(self, name, config):
         # Set split-dependent data.
         if split == 'training':
             # Training split folder and file with data question.
-            split_image_folder = os.path.join(self.data_folder, "images", "train")
             data_file = os.path.join(self.data_folder, "questions", 'CLEVR_train_questions.json')
+            self.split_image_folder = os.path.join(self.data_folder, "images", "train")
 
         elif split == 'validation':
             # Validation split folder and file with data question.
-            split_image_folder = os.path.join(self.data_folder, "images", "val")
             data_file = os.path.join(self.data_folder, "questions", 'CLEVR_val_questions.json')
+            self.split_image_folder = os.path.join(self.data_folder, "images", "val")
 
         elif split == 'test':
             # Test split folder and file with data question.
-            split_image_folder = os.path.join(self.data_folder, "images", "test")
             data_file = os.path.join(self.data_folder, "questions", 'CLEVR_test_questions.json')
+            self.split_image_folder = os.path.join(self.data_folder, "images", "test")
 
         else: # cogent
             raise ConfigurationError("Split {} not supported yet".format(split))
 
         # Load dataset.
-        self.dataset = self.load_dataset(data_file, split_image_folder)
-
+        self.dataset = self.load_dataset(data_file)
+        
         # Display exemplary sample.
-        #self.logger.info("Exemplary sample 0 ({}):\n [ category: {}\t image_ids: {}\t question: {}\t answer: {} ]".format(
-        #    self.ix[0],
-        #    self.category_idx_to_word[self.dataset[self.ix[0]][self.key_question_type_ids]],
-        #    self.dataset[self.ix[0]][self.key_image_ids],
-        #    self.dataset[self.ix[0]][self.key_questions],
-        #    self.dataset[self.ix[0]][self.key_answers]
-        #    ))
+        i = 0
+        self.logger.info("Exemplary sample {} ({}):\n  question_type: {} ({})\n  image_ids: {}\n  question: {}\n  answer: {}".format(
+            i, self.dataset[i]["question_index"],
+            self.question_family_id_to_subtype_mapping[self.dataset[i]["question_family_index"]],
+            self.question_family_id_to_subtype_id_mapping[self.dataset[i]["question_family_index"]],
+            self.dataset[i]["image_filename"],
+            self.dataset[i]["question"],
+            self.dataset[i]["answer"]
+            ))
 
 
 
@@ -196,32 +258,60 @@ def __len__(self):
         return len(self.dataset)
 
 
-    def load_dataset(self, source_data_file, source_image_folder):
+    def load_dataset(self, source_data_file):
         """
         Loads the dataset from source file
 
         :param source_data_file: jSON file with image ids, questions, answers, scene graphs, etc.
 
-        :param source_image_folder: Folder containing image files.
-
         """
         self.logger.info("Loading dataset from:\n {}".format(source_data_file))
-        # Set containing list of tuples.
         dataset = []
 
         with open(source_data_file) as f:
             self.logger.info('Loading samples from {} ...'.format(source_data_file))
-            dataset = json.load(f)
-        self.logger.info('Loaded {} samples'.format(len(dataset['questions'])))
-        print(dataset["questions"][0])
-        exit(1)
-
+            dataset = json.load(f)['questions']
 
         self.logger.info("Loaded dataset consisting of {} samples".format(len(dataset)))
-        # Return the created list.
         return dataset
 
 
+    def get_image(self, img_id):
+        """
+        Function loads and returns image along with its size.
+        Additionally, it performs all the required transformations.
+
+        :param img_id: Identifier of the images.
+        :param img_folder: Path to the image.
+
+        :return: image (Tensor)
+        """
+
+        # Load the image.
+        img = Image.open(os.path.join(self.split_image_folder, img_id))
+
+        image_transformations_list = []
+
+        # Optional: resize.
+        if 'resize' in self.image_preprocessing:
+            image_transformations_list.append(transforms.Resize([self.height,self.width]))
+
+        # Add obligatory transformation.
+        image_transformations_list.append(transforms.ToTensor())
+
+        # Optional: normalization.
+        if 'normalize' in self.image_preprocessing:
+            # Use normalization that the pretrained models from TorchVision require.
+            image_transformations_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
+
+        # Resize the image and transform to Torch Tensor.
+        transforms_com = transforms.Compose(image_transformations_list)
+        # Apply transformations.
+        img = transforms_com(img)
+
+        # Return image.
+        return img
+
     def __getitem__(self, index):
         """
         Getter method to access the dataset and return a single sample.
@@ -229,33 +319,33 @@ def __getitem__(self, index):
         :param index: index of the sample to return.
         :type index: int
 
-        :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'})
+        :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'question_type_ids', 'question_type_names'})
         """
         # Get item.
-        item = self.dataset[self.ix[index]]
+        item = self.dataset[index]
 
         # Create the resulting sample (data dict).
         data_dict = self.create_data_dict(index)
 
         # Load and stream the image ids.
-        img_id = item[self.key_image_ids]
+        img_id = item["image_filename"]
         data_dict[self.key_image_ids] = img_id
 
         # Load the adequate image - only when required.
         if self.stream_images:
-
+            img = self.get_image(img_id)
             # Image related variables.
-            data_dict[self.key_images] = item[self.key_images]
+            data_dict[self.key_images] = img
 
         # Return question.
-        data_dict[self.key_questions] = item[self.key_questions]
+        data_dict[self.key_questions] = item["question"]
 
         # Return answer. 
-        data_dict[self.key_answers] = item[self.key_answers]
+        data_dict[self.key_answers] = item["answer"]
 
         # Question type related variables.
-        data_dict[self.key_question_type_ids] = item[self.key_question_type_ids]
-        data_dict[self.key_question_type_names] = self.category_idx_to_word[item[self.key_question_type_ids]]
+        data_dict[self.key_question_type_ids] = self.question_family_id_to_subtype_id_mapping[item["question_family_index"]]
+        data_dict[self.key_question_type_names] = self.question_family_id_to_subtype_mapping[item["question_family_index"]]
 
         # Return sample.
         return data_dict
diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
index f5430b7..a739e4a 100644
--- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py
+++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
@@ -287,7 +287,7 @@ def __init__(self, name, config):
                 self.logger.info("Exported indices to '{}'".format(os.path.join(self.app_state.log_dir, self.config["export_indices"])))
 
         # Display exemplary sample.
-        self.logger.info("Exemplary sample 0 ({}):\n [ category: {}\t image_ids: {}\t question: {}\t answer: {} ]".format(
+        self.logger.info("Exemplary sample 0 ({}):\n  category: {}\n  image_ids: {}\n  question: {}\n  answer: {}".format(
             self.ix[0],
             self.category_idx_to_word[self.dataset[self.ix[0]][self.key_category_ids]],
             self.dataset[self.ix[0]][self.key_image_ids],

From 36bc65bf14c871e20cdfcdf394252b3bc07028a5 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 20:20:06 -0700
Subject: [PATCH 04/10] fixed issue with image, collate/pipeline_manager
 indices.shape[0] to be fixed

---
 ptp/components/problems/image_text_to_class/clevr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py
index e40e411..ce660f2 100644
--- a/ptp/components/problems/image_text_to_class/clevr.py
+++ b/ptp/components/problems/image_text_to_class/clevr.py
@@ -288,7 +288,7 @@ def get_image(self, img_id):
         """
 
         # Load the image.
-        img = Image.open(os.path.join(self.split_image_folder, img_id))
+        img = Image.open(os.path.join(self.split_image_folder, img_id)).convert('RGB')
 
         image_transformations_list = []
 

From 7df05037827ac175690b553485705910fa19d4a0 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 22:25:00 -0700
Subject: [PATCH 05/10] total_loss_support bugfix

---
 ptp/application/pipeline_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ptp/application/pipeline_manager.py b/ptp/application/pipeline_manager.py
index c5ee329..3f48cc2 100644
--- a/ptp/application/pipeline_manager.py
+++ b/ptp/application/pipeline_manager.py
@@ -720,7 +720,7 @@ def collect_statistics(self, stat_col, data_dict):
             for key in loss.loss_keys():
                 loss_sum += data_dict[key].cpu().item()
         stat_col["total_loss"] = loss_sum
-        stat_col["total_loss_support"] = data_dict["indices"].shape[0] # batch size
+        stat_col["total_loss_support"] = len(data_dict["indices"]) # batch size
 
 
     def add_aggregators(self, stat_agg):

From e50868a0bcbf388af150e788c60c65309aed05e2 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 22:32:51 -0700
Subject: [PATCH 06/10] Added statistics and answer decoder to default clevr
 pipeline

---
 configs/clevr/default_clevr.yml | 34 +++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml
index 97f7fb3..8e657c3 100644
--- a/configs/clevr/default_clevr.yml
+++ b/configs/clevr/default_clevr.yml
@@ -43,25 +43,55 @@ pipeline:
     word_mappings_file: 'answers.all.csv'
     export_word_mappings_to_globals: True
     globals:
+      word_mappings: answer_word_mappings
       vocabulary_size: num_answers
     streams:
       inputs: answers 
-      outputs: targets
+      outputs: target_answers
+
 
   # Loss
   nllloss:
     type: NLLLoss
     priority: 10.1
+    streams:
+      targets: target_answers
 
   # Statistics.
   batch_size:
     priority: 100.0
     type: BatchSizeStatistics
 
+  accuracy:
+    priority: 100.1
+    type: AccuracyStatistics
+    streams:
+      targets: target_answers
+
+  precision_recall:
+    priority: 100.2
+    type: PrecisionRecallStatistics
+    use_word_mappings: True
+    show_class_scores: True
+    globals:
+      word_mappings: answer_word_mappings
+    streams:
+      targets: target_answers
+
+  answer_decoder:
+    priority: 100.3
+    type: WordDecoder
+    import_word_mappings_from_globals: True
+    globals:
+      word_mappings: answer_word_mappings
+    streams:
+      inputs: predictions
+      outputs: predicted_answers
+
   stream_viewer:
     priority: 100.4
     type: StreamViewer
-    input_streams: index, question
+    input_streams: indices, questions, target_answers, predicted_answers
 
   #image_viewer:
   #  priority: 100.5

From 8e8ae1e73935c377fe2f3d3d946665aae4347927 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 23:04:52 -0700
Subject: [PATCH 07/10] second unimodal baseline for CLEVR

---
 .../clevr_question_classification_lstm.yml    | 61 +++++++++++++++++++
 configs/clevr/default_clevr.yml               |  4 +-
 2 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 configs/clevr/clevr_question_classification_lstm.yml

diff --git a/configs/clevr/clevr_question_classification_lstm.yml b/configs/clevr/clevr_question_classification_lstm.yml
new file mode 100644
index 0000000..5dbd0d4
--- /dev/null
+++ b/configs/clevr/clevr_question_classification_lstm.yml
@@ -0,0 +1,61 @@
+# Load config defining CLEVR problems for training, validation and testing.
+default_configs: clevr/default_clevr.yml
+
+# Stop streaming images - in all sets.
+training:
+  problem: 
+    stream_images: False
+
+validation:
+  problem: 
+    stream_images: False
+
+test:
+  problem: 
+    stream_images: False
+
+# Definition of the pipeline.
+pipeline:
+
+  # Questions encoding.
+  question_tokenizer:
+    priority: 1.1
+    type: SentenceTokenizer
+    # Lowercase all letters + remove punctuation (reduced vocabulary of 80 words instead of 87)
+    preprocessing: all
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/CLEVR_v1.0
+    word_mappings_file: questions.all.word.mappings.lowercase.csv
+    export_word_mappings_to_globals: True
+    globals:
+      word_mappings: question_word_mappings
+      vocabulary_size: num_question_words
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Zero
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+    globals:
+      input_size: embeddings_size
+      prediction_size: num_answers
+
+
+#: pipeline
diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml
index 8e657c3..477cdda 100644
--- a/configs/clevr/default_clevr.yml
+++ b/configs/clevr/default_clevr.yml
@@ -39,8 +39,8 @@ pipeline:
     type: LabelIndexer
     priority: 0.1
     # Load word mappings for answers.
-    data_folder: '~/data/CLEVR_v1.0'
-    word_mappings_file: 'answers.all.csv'
+    data_folder: ~/data/CLEVR_v1.0
+    word_mappings_file: answers.all.word.mappings.csv
     export_word_mappings_to_globals: True
     globals:
       word_mappings: answer_word_mappings

From d695b13b0a26c1a77e182875181471828c2db490 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 23:41:45 -0700
Subject: [PATCH 08/10] simple multimodal pipeline - concatenation

---
 .../clevr_all_vgg_glove_lstm_concat_ffn.yml   | 111 ++++++++++++++++++
 ...ax.yml => clevr_image_convnet_softmax.yml} |   0
 ...lstm.yml => clevr_question_glove_lstm.yml} |   2 +-
 configs/clevr/default_clevr.yml               |   6 +-
 .../problems/image_text_to_class/clevr.yml    |   2 +-
 .../problems/image_text_to_class/clevr.py     |  16 ++-
 6 files changed, 123 insertions(+), 14 deletions(-)
 create mode 100644 configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml
 rename configs/clevr/{clevr_image_classification_convnet_softmax.yml => clevr_image_convnet_softmax.yml} (100%)
 rename configs/clevr/{clevr_question_classification_lstm.yml => clevr_question_glove_lstm.yml} (94%)

diff --git a/configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml b/configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml
new file mode 100644
index 0000000..da57752
--- /dev/null
+++ b/configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml
@@ -0,0 +1,111 @@
+# Load config defining CLEVR problems for training, validation and testing.
+default_configs: clevr/default_clevr.yml
+
+# Resize and normalize images - in all sets.
+training:
+  problem: 
+    resize_image: [224, 224]
+    image_preprocessing: normalize
+
+validation:
+  problem: 
+    resize_image: [224, 224]
+    image_preprocessing: normalize
+
+test:
+  problem: 
+    resize_image: [224, 224]
+    image_preprocessing: normalize
+
+# Definition of the pipeline.
+pipeline:
+
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    keys: [question_encoder_output_size, image_encoder_output_size]
+    values: [100, 100]
+
+  ##################################################################
+  # 1st pipeline: question.
+  # Questions encoding.
+  question_tokenizer:
+    priority: 1.1
+    type: SentenceTokenizer
+    # Lowercase all letters + remove punctuation (reduced vocabulary of 80 words instead of 87)
+    preprocessing: all
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/CLEVR_v1.0
+    word_mappings_file: questions.all.word.mappings.lowercase.csv
+    export_word_mappings_to_globals: True
+    globals:
+      word_mappings: question_word_mappings
+      vocabulary_size: num_question_words
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+  
+  # Model 2: RNN
+  lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Zero
+    hidden_size: 50
+    # Turn of softmax.
+    use_logsoftmax: False
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  ##################################################################
+  # 2nd subpipeline: image.
+  # Image encoder.
+  image_encoder:
+    priority: 2.1
+    type: TorchVisionWrapper
+    model_type: vgg16
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ##################################################################
+  # 3rd subpipeline: concatenation + FF.
+  concat:
+    type: Concatenation
+    priority: 3.1
+    input_streams: [question_activations,image_activations]
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100]]
+    output_dims: [-1,200]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_size
+
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    priority: 3.2
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: concatenated_size
+      prediction_size: num_answers
+
+#: pipeline
diff --git a/configs/clevr/clevr_image_classification_convnet_softmax.yml b/configs/clevr/clevr_image_convnet_softmax.yml
similarity index 100%
rename from configs/clevr/clevr_image_classification_convnet_softmax.yml
rename to configs/clevr/clevr_image_convnet_softmax.yml
diff --git a/configs/clevr/clevr_question_classification_lstm.yml b/configs/clevr/clevr_question_glove_lstm.yml
similarity index 94%
rename from configs/clevr/clevr_question_classification_lstm.yml
rename to configs/clevr/clevr_question_glove_lstm.yml
index 5dbd0d4..02a5399 100644
--- a/configs/clevr/clevr_question_classification_lstm.yml
+++ b/configs/clevr/clevr_question_glove_lstm.yml
@@ -1,7 +1,7 @@
 # Load config defining CLEVR problems for training, validation and testing.
 default_configs: clevr/default_clevr.yml
 
-# Stop streaming images - in all sets.
+# This is unimodal (questino-based) baseline, thus stop streaming images - in all sets.
 training:
   problem: 
     stream_images: False
diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml
index 477cdda..87d5d92 100644
--- a/configs/clevr/default_clevr.yml
+++ b/configs/clevr/default_clevr.yml
@@ -4,7 +4,7 @@ training:
     type: CLEVR
     batch_size: &b 64
     split: training
-    #resize: [224, 224]
+    #resize_image: [224, 224]
   # optimizer parameters:
   optimizer:
     type: Adam
@@ -22,7 +22,7 @@ validation:
     type: CLEVR
     batch_size: *b
     split: validation
-    #resize: [224, 224]
+    #resize_image: [224, 224]
 
 # Testing parameters:
 test:
@@ -30,7 +30,7 @@ test:
     type: CLEVR
     batch_size: *b
     split: test
-    #resize: [224, 224]
+    #resize_image: [224, 224]
 
 pipeline:
   disable: image_viewer
diff --git a/configs/default/components/problems/image_text_to_class/clevr.yml b/configs/default/components/problems/image_text_to_class/clevr.yml
index 3914bea..0d5b571 100644
--- a/configs/default/components/problems/image_text_to_class/clevr.yml
+++ b/configs/default/components/problems/image_text_to_class/clevr.yml
@@ -23,7 +23,7 @@ stream_images: True
 # Use one (or more) of the affine transformations:
 # none | normalize | all
 # Accepted formats: a,b,c or [a,b,c]
-image_preprocessing: normalize
+image_preprocessing: none
 
 streams:
   ####################################################################
diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py
index ce660f2..c3ff3d0 100644
--- a/ptp/components/problems/image_text_to_class/clevr.py
+++ b/ptp/components/problems/image_text_to_class/clevr.py
@@ -109,6 +109,7 @@ def __init__(self, name, config):
             self.width = 320 
             self.depth = 3
             resize = False
+        self.logger.info("Setting image size to [D  x H x W]: {} x {} x {}".format(self.depth,  self.height, self.width))
 
         # Set global variables - all dimensions ASIDE OF BATCH.
         self.globals["image_height"] = self.height
@@ -120,17 +121,14 @@ def __init__(self, name, config):
             "image_preprocessing", self.config,
             'none | normalize | all'.split(" | ")
             )
+        if 'none' in self.image_preprocessing:
+            self.image_preprocessing = []
+        if 'all' in self.image_preprocessing:
+            self.image_preprocessing = ['normalize']
+
         if resize:
             # Add resize as transformation.
-            if 'none' in self.image_preprocessing:
-                self.image_preprocessing = ["resize"]
-            if 'all' in self.image_preprocessing:
-                self.image_preprocessing = ["resize", 'normalize']
-        else:
-            if 'none' in self.image_preprocessing:
-                self.image_preprocessing = []
-            if 'all' in self.image_preprocessing:
-                self.image_preprocessing = ['normalize']
+                self.image_preprocessing = ["resize"] + self.image_preprocessing
         self.logger.info("Applied image preprocessing: {}".format(self.image_preprocessing))
 
         # Mapping of question subtypes to types (not used, but keeping it just in case).

From 4d39c038b90b5db6bb25d4ddcda7467c940c8d60 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 23:44:04 -0700
Subject: [PATCH 09/10] rename

---
 ...levr_image_convnet_softmax.yml => clevr_image_convnet_ffn.yml} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename configs/clevr/{clevr_image_convnet_softmax.yml => clevr_image_convnet_ffn.yml} (100%)

diff --git a/configs/clevr/clevr_image_convnet_softmax.yml b/configs/clevr/clevr_image_convnet_ffn.yml
similarity index 100%
rename from configs/clevr/clevr_image_convnet_softmax.yml
rename to configs/clevr/clevr_image_convnet_ffn.yml

From 8d8c588000a4af23d54e6d7ca6455bc9e07ad12e Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Tue, 4 Jun 2019 23:47:21 -0700
Subject: [PATCH 10/10] lgtm unused import warning fix

---
 ptp/components/problems/image_text_to_class/clevr.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py
index c3ff3d0..9c57794 100644
--- a/ptp/components/problems/image_text_to_class/clevr.py
+++ b/ptp/components/problems/image_text_to_class/clevr.py
@@ -18,13 +18,8 @@
 __author__ = "Tomasz Kornuta"
 
 import os
-#import string
 import json
-import tqdm
-
 from PIL import Image
-#import numpy as np
-#import nltk
 
 import torch
 from torchvision import transforms