From fab6171fc14e396cd6597270dda8019458d6dd13 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 5 Jun 2019 14:04:40 -0700
Subject: [PATCH 1/3] comment cleanup

---
 configs/default/workers/offline_trainer.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configs/default/workers/offline_trainer.yml b/configs/default/workers/offline_trainer.yml
index afbc7b1..110231c 100644
--- a/configs/default/workers/offline_trainer.yml
+++ b/configs/default/workers/offline_trainer.yml
@@ -1,6 +1,5 @@
 ####################################################################
 # Section defining all the default values of parameters used during training when using ptp-offline-trainer.
-
 # If you want to use different section for "training" pass its name as command line argument '--training_section_name' to trainer (DEFAULT: training)
 # Note: the following parameters will be (anyway) used as default values.
 default_training:

From a5f5ac2150ff3d063d6fb74e37aa3ce5063e10ee Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 5 Jun 2019 14:14:48 -0700
Subject: [PATCH 2/3] reorganization of unittests

---
 tests/__init__.py                             | 41 ++++++++++++-------
 tests/{ => application}/pipeline_tests.py     |  0
 tests/{ => components}/component_tests.py     |  0
 tests/{ => components}/problem_tests.py       |  0
 .../config_interface_tests.py                 |  0
 .../config_registry_tests.py                  |  0
 .../{ => configuration}/handshaking_tests.py  |  0
 .../{ => data_types}/data_definition_tests.py |  0
 tests/{ => data_types}/data_dict_tests.py     |  0
 tests/{ => utils}/app_state_tests.py          |  0
 tests/{ => utils}/sampler_factory_tests.py    |  0
 tests/{ => utils}/samplers_tests.py           |  0
 tests/{ => utils}/statistics_tests.py         |  0
 13 files changed, 26 insertions(+), 15 deletions(-)
 rename tests/{ => application}/pipeline_tests.py (100%)
 rename tests/{ => components}/component_tests.py (100%)
 rename tests/{ => components}/problem_tests.py (100%)
 rename tests/{ => configuration}/config_interface_tests.py (100%)
 rename tests/{ => configuration}/config_registry_tests.py (100%)
 rename tests/{ => configuration}/handshaking_tests.py (100%)
 rename tests/{ => data_types}/data_definition_tests.py (100%)
 rename tests/{ => data_types}/data_dict_tests.py (100%)
 rename tests/{ => utils}/app_state_tests.py (100%)
 rename tests/{ => utils}/sampler_factory_tests.py (100%)
 rename tests/{ => utils}/samplers_tests.py (100%)
 rename tests/{ => utils}/statistics_tests.py (100%)

diff --git a/tests/__init__.py b/tests/__init__.py
index dd5ae10..8c4758b 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,26 +1,37 @@
-from .app_state_tests import TestAppState
-from .component_tests import TestComponent
-from .config_interface_tests import TestConfigInterface
-from .config_registry_tests import TestConfigRegistry
-from .data_dict_tests import TestDataDict
-from .data_definition_tests import TestDataDefinition
-from .handshaking_tests import TestHandshaking
-from .pipeline_tests import TestPipeline
-from .problem_tests import TestProblem
-from .sampler_factory_tests import TestSamplerFactory
-from .samplers_tests import TestkFoldRandomSampler, TestkFoldWeightedRandomSampler
+from .application.pipeline_tests import TestPipeline
+
+from .components.component_tests import TestComponent
+from .components.problem_tests import TestProblem
+
+from .configuration.config_interface_tests import TestConfigInterface
+from .configuration.config_registry_tests import TestConfigRegistry
+from .configuration.handshaking_tests import TestHandshaking
+
+from .data_types.data_dict_tests import TestDataDict
+from .data_types.data_definition_tests import TestDataDefinition
+
+from .utils.app_state_tests import TestAppState
+from .utils.sampler_factory_tests import TestSamplerFactory
+from .utils.samplers_tests import TestkFoldRandomSampler, TestkFoldWeightedRandomSampler
+from .utils.statistics_tests import TestStatistics
 
 __all__ = [
-    'TestAppState',
+    # Application
+    'TestPipeline',
+    # Components
     'TestComponent',
+    'TestProblem',
+    # Configuration
     'TestConfigRegistry',
     'TestConfigInterface',
+    'TestHandshaking',
+    # DataTypes
     'TestDataDict',
     'TestDataDefinition',
-    'TestHandshaking',
-    'TestPipeline',
-    'TestProblem',
+    # Utils
+    'TestAppState',
     'TestSamplerFactory',
     'TestkFoldRandomSampler',
     'TestkFoldWeightedRandomSampler',
+    'TestStatistics',
     ]
diff --git a/tests/pipeline_tests.py b/tests/application/pipeline_tests.py
similarity index 100%
rename from tests/pipeline_tests.py
rename to tests/application/pipeline_tests.py
diff --git a/tests/component_tests.py b/tests/components/component_tests.py
similarity index 100%
rename from tests/component_tests.py
rename to tests/components/component_tests.py
diff --git a/tests/problem_tests.py b/tests/components/problem_tests.py
similarity index 100%
rename from tests/problem_tests.py
rename to tests/components/problem_tests.py
diff --git a/tests/config_interface_tests.py b/tests/configuration/config_interface_tests.py
similarity index 100%
rename from tests/config_interface_tests.py
rename to tests/configuration/config_interface_tests.py
diff --git a/tests/config_registry_tests.py b/tests/configuration/config_registry_tests.py
similarity index 100%
rename from tests/config_registry_tests.py
rename to tests/configuration/config_registry_tests.py
diff --git a/tests/handshaking_tests.py b/tests/configuration/handshaking_tests.py
similarity index 100%
rename from tests/handshaking_tests.py
rename to tests/configuration/handshaking_tests.py
diff --git a/tests/data_definition_tests.py b/tests/data_types/data_definition_tests.py
similarity index 100%
rename from tests/data_definition_tests.py
rename to tests/data_types/data_definition_tests.py
diff --git a/tests/data_dict_tests.py b/tests/data_types/data_dict_tests.py
similarity index 100%
rename from tests/data_dict_tests.py
rename to tests/data_types/data_dict_tests.py
diff --git a/tests/app_state_tests.py b/tests/utils/app_state_tests.py
similarity index 100%
rename from tests/app_state_tests.py
rename to tests/utils/app_state_tests.py
diff --git a/tests/sampler_factory_tests.py b/tests/utils/sampler_factory_tests.py
similarity index 100%
rename from tests/sampler_factory_tests.py
rename to tests/utils/sampler_factory_tests.py
diff --git a/tests/samplers_tests.py b/tests/utils/samplers_tests.py
similarity index 100%
rename from tests/samplers_tests.py
rename to tests/utils/samplers_tests.py
diff --git a/tests/statistics_tests.py b/tests/utils/statistics_tests.py
similarity index 100%
rename from tests/statistics_tests.py
rename to tests/utils/statistics_tests.py

From f58a76eff15692d489819faa2f7283ab71ee0295 Mon Sep 17 00:00:00 2001
From: tkornut <tkornut@us.ibm.com>
Date: Wed, 5 Jun 2019 16:03:19 -0700
Subject: [PATCH 3/3] CLEVR test set fixes + CLEVR unittest

---
 .../problems/image_text_to_class/clevr.py     |  35 +++--
 tests/__init__.py                             |   2 +
 tests/components/clevr_tests.py               | 125 ++++++++++++++++++
 3 files changed, 153 insertions(+), 9 deletions(-)
 create mode 100644 tests/components/clevr_tests.py

diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py
index 9c57794..787154a 100644
--- a/ptp/components/problems/image_text_to_class/clevr.py
+++ b/ptp/components/problems/image_text_to_class/clevr.py
@@ -204,13 +204,23 @@ def __init__(self, name, config):
         
         # Display exemplary sample.
         i = 0
+        sample = self.dataset[i]
+        # Check if this is a test set.
+        if "answer" not in sample.keys():
+            sample["answer"] = "<UNK>"
+            sample[self.key_question_type_ids] = -1
+            sample[self.key_question_type_names] = "<UNK>"
+        else:
+            sample[self.key_question_type_ids] = self.question_family_id_to_subtype_id_mapping[sample["question_family_index"]]
+            sample[self.key_question_type_names] = self.question_family_id_to_subtype_mapping[sample["question_family_index"]]
+
         self.logger.info("Exemplary sample {} ({}):\n  question_type: {} ({})\n  image_ids: {}\n  question: {}\n  answer: {}".format(
-            i, self.dataset[i]["question_index"],
-            self.question_family_id_to_subtype_mapping[self.dataset[i]["question_family_index"]],
-            self.question_family_id_to_subtype_id_mapping[self.dataset[i]["question_family_index"]],
-            self.dataset[i]["image_filename"],
-            self.dataset[i]["question"],
-            self.dataset[i]["answer"]
+            i, sample["question_index"],
+            sample[self.key_question_type_ids],
+            sample[self.key_question_type_names],
+            sample["image_filename"],
+            sample["question"],
+            sample["answer"]
             ))
 
 
@@ -334,11 +344,18 @@ def __getitem__(self, index):
         data_dict[self.key_questions] = item["question"]
 
         # Return answer. 
-        data_dict[self.key_answers] = item["answer"]
+        if "answer" in item.keys():
+            data_dict[self.key_answers] = item["answer"]
+        else:
+            data_dict[self.key_answers] = "<UNK>"
 
         # Question type related variables.
-        data_dict[self.key_question_type_ids] = self.question_family_id_to_subtype_id_mapping[item["question_family_index"]]
-        data_dict[self.key_question_type_names] = self.question_family_id_to_subtype_mapping[item["question_family_index"]]
+        if "question_family_index" in item.keys():
+            data_dict[self.key_question_type_ids] = self.question_family_id_to_subtype_id_mapping[item["question_family_index"]]
+            data_dict[self.key_question_type_names] = self.question_family_id_to_subtype_mapping[item["question_family_index"]]
+        else:
+            data_dict[self.key_question_type_ids] = -1
+            data_dict[self.key_question_type_names] = "<UNK>"
 
         # Return sample.
         return data_dict
diff --git a/tests/__init__.py b/tests/__init__.py
index 8c4758b..c864ca0 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,6 +1,7 @@
 from .application.pipeline_tests import TestPipeline
 
 from .components.component_tests import TestComponent
+from .components.clevr_tests import TestCLEVR
 from .components.problem_tests import TestProblem
 
 from .configuration.config_interface_tests import TestConfigInterface
@@ -20,6 +21,7 @@
     'TestPipeline',
     # Components
     'TestComponent',
+    'TestCLEVR',
     'TestProblem',
     # Configuration
     'TestConfigRegistry',
diff --git a/tests/components/clevr_tests.py b/tests/components/clevr_tests.py
new file mode 100644
index 0000000..f2bf218
--- /dev/null
+++ b/tests/components/clevr_tests.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) tkornuta, IBM Corporation 2019
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__author__ = "Tomasz Kornuta"
+
+import unittest
+from os import path
+
+from ptp.components.utils.io import check_file_existence
+from ptp.components.problems.image_text_to_class.clevr import CLEVR
+from ptp.data_types.data_definition import DataDefinition
+from ptp.configuration.config_interface import ConfigInterface
+
+
+class TestCLEVR(unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        super(TestCLEVR, self).__init__(*args, **kwargs)
+
+        # Check the existence of training set.
+        self.unittest_training_set = False # check_file_existence(path.expanduser('~/data/CLEVR_v1.0/questions'),'CLEVR_train_questions.json')
+        # Check the existence of validation set.
+        self.unittest_validation_set = check_file_existence(path.expanduser('~/data/CLEVR_v1.0/questions'),'CLEVR_val_questions.json')
+        # Check the existence of test set.
+        self.unittest_test_set = check_file_existence(path.expanduser('~/data/CLEVR_v1.0/questions'),'CLEVR_test_questions.json')
+        
+
+    def test_training_set(self):
+        """
+            Tests the CLEVR training split.
+
+            ..note:
+                Test is performed only if json file '~/data/CLEVR_v1.0/questions/CLEVR_train_questions.json' is found.
+        """
+        if not self.unittest_training_set:
+            return
+        # Empty config.
+        config = ConfigInterface()
+        config.add_config_params({"split": "training"})
+        clevr = CLEVR("CLEVR", config)
+
+        # Check dataset size.
+        self.assertEqual(len(clevr), 699989)
+
+        # Check sample.
+        sample = clevr[0]
+        self.assertEqual(sample['indices'], 0)
+        self.assertEqual(sample['image_ids'], 'CLEVR_train_000000.png')
+        self.assertEqual(sample['question_type_ids'], 4)
+        self.assertEqual(sample['question_type_names'], 'greater_than')
+        self.assertEqual(sample['questions'], 'Are there more big green things than large purple shiny cubes?')
+        self.assertEqual(sample['answers'], 'yes')
+        
+    
+    def test_validation_set(self):
+        """
+            Tests the CLEVR validation split.
+
+            ..note:
+                Test is performed only if json file '~/data/CLEVR_v1.0/questions/CLEVR_val_questions.json' is found.
+        """
+        if not self.unittest_validation_set:
+            return
+        # Empty config.
+        config = ConfigInterface()
+        config.add_config_params({"split": "validation"})
+        clevr = CLEVR("CLEVR", config)
+
+        # Check dataset size.
+        self.assertEqual(len(clevr), 149991)
+
+        # Check sample.
+        sample = clevr[0]
+        self.assertEqual(sample['indices'], 0)
+        self.assertEqual(sample['image_ids'], 'CLEVR_val_000000.png')
+        self.assertEqual(sample['question_type_ids'], 10)
+        self.assertEqual(sample['question_type_names'], 'exist')
+        self.assertEqual(sample['questions'], 'Are there any other things that are the same shape as the big metallic object?')
+        self.assertEqual(sample['answers'], 'no')
+        
+
+    def test_test_set(self):
+        """
+            Tests the CLEVR test split.
+
+            ..note:
+                Test is performed only if json file '~/data/CLEVR_v1.0/questions/CLEVR_test_questions.json' is found.
+        """
+        if not self.unittest_test_set:
+            return
+        # Empty config.
+        config = ConfigInterface()
+        config.add_config_params({"split": "test"})
+        clevr = CLEVR("CLEVR", config)
+
+        # Check dataset size.
+        self.assertEqual(len(clevr), 149988)
+
+        # Check sample.
+        sample = clevr[0]
+        self.assertEqual(sample['indices'], 0)
+        self.assertEqual(sample['image_ids'], 'CLEVR_test_000000.png')
+        self.assertEqual(sample['question_type_ids'], -1)
+        self.assertEqual(sample['question_type_names'], '<UNK>')
+        self.assertEqual(sample['questions'], 'Is there anything else that is the same shape as the small brown matte object?')
+        self.assertEqual(sample['answers'], '<UNK>')
+        
+
+
+
+#if __name__ == "__main__":
+#    unittest.main()
\ No newline at end of file