IBM · tkornuta-ibm · Jun 4, 2019 · Jun 1, 2019 · Jun 1, 2019 · Jun 1, 2019
diff --git a/README.md b/README.md
@@ -22,10 +22,10 @@ In its core, to _accelerate the computations_ on their own, PTP relies on PyTorc
 The models are _agnostic_ to those operations and one indicates whether to use them in configuration files (data loaders) or by passing adequate run-time arguments (--gpu).
 
 **Datasets:**
-PTP focuses on multi-modal reasoning combining vision and language. Currently it offers the following _Problems_ from both domains:
+PTP focuses on multi-modal reasoning combining vision and language. Currently it offers the following _Problems_ from the following problem domains:
 
   * ImageCLEF VQA-Med 2019 (Visual Question Answering)
-  * MNIST (Image Classification)
+  * MNIST, CIFAR-100 (Image Classification)
   * WiLY (Language Identification)
   * WikiText-2 / WikiText-103 (Language Modelling)
   * ANKI (Machine Translation)

diff --git a/configs/cifar100/cifar100_classification_convnet_softmax.yml b/configs/cifar100/cifar100_classification_convnet_softmax.yml
@@ -0,0 +1,34 @@
+# Load config defining CIFAR100 problems for training, validation and testing.
+default_configs: cifar100/default_cifar100.yml
+
+# Definition of the pipeline.
+pipeline:
+
+  # Model consisting of two components.
+  image_encoder:
+    priority: 1.1
+    type: ConvNetEncoder
+
+  # Reshape inputs
+  reshaper:
+    priority: 1.2
+    type: ReshapeTensor
+    input_dims: [-1, 16, 2, 2]
+    output_dims: [-1, 64]
+    streams:
+      inputs: feature_maps
+      outputs: reshaped_maps
+    globals:
+      output_size: reshaped_maps_size
+
+  # Image classifier.
+  classifier:
+    priority: 1.3
+    type: FeedForwardNetwork 
+    streams:
+      inputs: reshaped_maps
+    globals:
+      input_size: reshaped_maps_size
+      prediction_size: num_fine_classes
+
+#: pipeline
diff --git a/configs/cifar100/default_cifar100.yml b/configs/cifar100/default_cifar100.yml
@@ -0,0 +1,101 @@
+# Training parameters:
+training:
+  problem: 
+    type: CIFAR100
+    batch_size: &b 128
+    use_train_data: True
+  # Use sampler that operates on a subset.
+  #dataloader:
+  #  shuffle: False
+  sampler:
+    type: SubsetRandomSampler
+    indices: [0, 45000]
+  # optimizer parameters:
+  optimizer:
+    type: Adam
+    lr: 0.001
+  # settings parameters
+  terminal_conditions:
+    loss_stop_threshold: 0.05
+    early_stop_validations: -1
+    episode_limit: 10000
+    epoch_limit: 10
+
+# Validation parameters:
+validation:
+  #partial_validation_interval: 100
+  problem:
+    type: CIFAR100
+    batch_size: *b
+    use_train_data: True  # True because we are splitting the training set to: validation and training
+    #resize: [32, 32]
+  # Use sampler that operates on a subset.
+  sampler:
+    type: SubsetRandomSampler
+    indices: [45000, 50000]
+
+# Testing parameters:
+test:
+  problem:
+    type: MNIST
+    batch_size: *b
+    use_train_data: False
+    #resize: [32, 32]
+
+pipeline:
+  disable: image_viewer
+
+  # Loss
+  nllloss:
+    type: NLLLoss
+    priority: 10.0
+    streams:
+      targets: fine_targets
+
+  # Statistics.
+  batch_size:
+    priority: 100.0
+    type: BatchSizeStatistics
+    streams:
+      targets: fine_targets
+
+  accuracy:
+    priority: 100.1
+    type: AccuracyStatistics
+    streams:
+      targets: fine_targets
+
+
+  precision_recall:
+    priority: 100.2
+    type: PrecisionRecallStatistics
+    use_word_mappings: True
+    #show_class_scores: True
+    globals:
+      word_mappings: fine_label_word_mappings
+    streams:
+      targets: fine_targets
+
+  answer_decoder:
+    priority: 100.3
+    type: WordDecoder
+    import_word_mappings_from_globals: True
+    globals:
+      word_mappings: fine_label_word_mappings
+    streams:
+      inputs: predictions
+      outputs: answers
+
+  stream_viewer:
+    priority: 100.4
+    type: StreamViewer
+    input_streams: coarse_targets, coarse_labels, fine_targets, fine_labels, answers
+
+  image_viewer:
+    priority: 100.5
+    type: ImageToClassViewer
+    streams:
+      images: inputs
+      labels: fine_labels
+      answers: coarse_labels  
+
diff --git a/configs/default/components/problems/image_to_class/cifar_100.yml b/configs/default/components/problems/image_to_class/cifar_100.yml
@@ -0,0 +1,65 @@
+# This file defines the default values for the CIFAR-100 problem.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Folder where problem will store data (LOADED)
+data_folder: '~/data/cifar-100'
+
+# Defines the set that will be used used (LOADED)
+# True: training set | False: test set.
+use_train_data: True
+
+# Optional parameter (LOADED)
+# When present, resizes the CIFAR images from [32,32] to [width, height]
+#resize_image: [height, width]
+
+streams:
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of indices (OUTPUT)
+  # Every problem MUST return that stream.
+  indices: indices
+
+  # Stream containing batch of images (OUTPUT)
+  images: images
+
+  # Streams containing targets (label ids) (OUTPUT)
+  coarse_targets: coarse_targets
+  fine_targets: fine_targets
+
+  # Streams containing labels (words) (OUTPUT)
+  coarse_labels: coarse_labels
+  fine_labels: fine_labels
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  # Width of the image (SET)
+  input_width: image_width
+  # Height of the image (SET)
+  input_height: image_height
+  # Depth of the image (SET)
+  input_depth: image_depth
+
+  # Numbers of output classes (SET)
+  coarse_num_classes: coarse_num_classes
+  fine_num_classes: fine_num_classes
+
+  # Labels (word-idx) mappings (SET)
+  coarse_label_word_mappings: coarse_label_word_mappings
+  fine_label_word_mappings: fine_label_word_mappings
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
+
diff --git a/configs/default/components/problems/image_to_class/mnist.yml b/configs/default/components/problems/image_to_class/mnist.yml
@@ -30,6 +30,9 @@ streams:
   # Stream containing targets (label ids) (OUTPUT)
   targets: targets
 
+  # Stream containing labels (words) (OUTPUT)
+  labels: labels
+
 globals:
   ####################################################################
   # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.

diff --git a/configs/default/components/text/word_decoder.yml b/configs/default/components/text/word_decoder.yml
@@ -36,7 +36,7 @@ streams:
   # Stream containing input tensor (INPUT)
   inputs: inputs
 
-  # Stream containing output tensor (OUTPUT)
+  # Stream containing output words (OUTPUT)
   outputs: outputs
 
 globals: 

diff --git a/configs/default/components/viewers/image_to_class_viewer.yml b/configs/default/components/viewers/image_to_class_viewer.yml
@@ -0,0 +1,38 @@
+# This file defines the default values for the ImageToClassViewer.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Number of sample that will be printed (LOADED)
+# Default: -1 (means random)
+sample_number: -1
+
+streams: 
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing inages (INPUT)
+  images: images
+
+  # Stream containing target labels (strings) (INPUT)
+  label: labels
+
+  # Stream containing predicted labels (strings) (INPUT)
+  answers: answers
+
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
+
diff --git a/configs/mnist/default_mnist.yml b/configs/mnist/default_mnist.yml
@@ -16,7 +16,7 @@ training:
   # settings parameters
   terminal_conditions:
     loss_stop_threshold: 0.05
-    early_stop_validations: 10
+    early_stop_validations: -1
     episode_limit: 10000
     epoch_limit: 10
 
@@ -42,6 +42,7 @@ test:
     #resize: [32, 32]
 
 pipeline:
+  disable: image_viewer
 
   # Loss
   nllloss:
@@ -50,20 +51,42 @@ pipeline:
 
   # Statistics.
   batch_size:
-    type: BatchSizeStatistics
     priority: 100.0
+    type: BatchSizeStatistics
 
   accuracy:
-    type: AccuracyStatistics
     priority: 100.1
+    type: AccuracyStatistics
 
 
   precision_recall:
-    type: PrecisionRecallStatistics
     priority: 100.2
+    type: PrecisionRecallStatistics
     use_word_mappings: True
     show_class_scores: True
     globals:
       word_mappings: label_word_mappings
-
+
+  answer_decoder:
+    priority: 100.3
+    type: WordDecoder
+    import_word_mappings_from_globals: True
+    globals:
+      word_mappings: label_word_mappings
+    streams:
+      inputs: predictions
+      outputs: answers
+
+  stream_viewer:
+    priority: 100.4
+    type: StreamViewer
+    input_streams: labels, answers
+
+  image_viewer:
+    priority: 100.5
+    type: ImageToClassViewer
+    streams:
+      images: inputs
+      labels: labels
+      answers: answers
 
diff --git a/configs/mnist/mnist_classification_vf_2lenet5_2losses.yml b/configs/mnist/mnist_classification_vf_2lenet5_2losses.yml
@@ -20,7 +20,7 @@ test:
 pipeline:
 
   # Disable components for "default" flow.
-  disable: nllloss, accuracy, precision_recall
+  disable: nllloss, accuracy, precision_recall, image_viewer
 
   # Add global variables.
   global_publisher:
@@ -218,5 +218,12 @@ pipeline:
       recall: joined_recall
       f1score: joined_f1score
 
+  # "Fix" (overwrite) stream names in viewers.
+  image_viewer:
+    streams:
+      answers: merged_predictions
+
+  stream_viewer:
+    input_streams: labels, merged_predictions
 
 #: pipeline
diff --git a/configs/mnist/mnist_classification_vf_shared_convnet_2softmaxes_2losses.yml b/configs/mnist/mnist_classification_vf_shared_convnet_2softmaxes_2losses.yml
@@ -27,7 +27,7 @@ training:
 pipeline:
 
   # Disable components for "default" flow.
-  disable: nllloss, accuracy, precision_recall
+  disable: nllloss, accuracy, precision_recall, answer_decoder, image_viewer
 
   ################# SHARED #################
 
@@ -238,5 +238,12 @@ pipeline:
       recall: joined_recall
       f1score: joined_f1score
 
+  # "Fix" (overwrite) stream names in viewers.
+  image_viewer:
+    streams:
+      answers: merged_predictions
+
+  stream_viewer:
+    input_streams: labels, merged_predictions
 
 #: pipeline
diff --git a/ptp/components/masking/join_masked_predictions.py b/ptp/components/masking/join_masked_predictions.py
@@ -108,7 +108,7 @@ def output_data_definitions(self):
         """
         return {
             self.key_output_indices: DataDefinition([-1], [torch.Tensor], "Batch of merged (output) indices [BATCH_SIZE]"),
-            self.key_output_strings: DataDefinition([-1], [torch.Tensor], "Batch of merged strings, corresponging to indices when using the provided word mappings [BATCH_SIZE]")
+            self.key_output_strings: DataDefinition([-1, 1], [list, str], "Batch of merged strings, corresponging to indices when using the provided word mappings [BATCH_SIZE] x [STRING]")
             }