From e380eb690a4bffbde9f3265ea259a5a80ffd7c9d Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <tkornut@us.ibm.com>
Date: Mon, 6 May 2019 10:16:15 -0700
Subject: [PATCH 1/9] frozen pipeline for input fusion

---
 ...stm_resnet152_att_is_cat_ffn_c123_loss.yml |   2 +-
 ...stm_resnet152_mcb_is_cat_ffn_c123_loss.yml |   2 +-
 ...ve_lstm_vgg16_att_is_cat_ffn_c123_loss.yml |   6 +-
 .../c123_frozen_if_ffn_c123_loss.yml          | 157 ++++++++++++++++++
 ...nput_fusion_glove_lstm_vgg_att_is_cat.yml} |  85 ++++++++--
 ...question_categorization_glove_rnn_ffn.yml} |   0
 .../frozen_word_answer_glove_sum.yml          |   0
 7 files changed, 229 insertions(+), 23 deletions(-)
 create mode 100644 configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml
 rename configs/vqa_med_2019/{c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml => frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml} (65%)
 rename configs/vqa_med_2019/{question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml => frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml} (100%)
 create mode 100644 configs/vqa_med_2019/frozen_pipelines/frozen_word_answer_glove_sum.yml

diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml
index 232cbbe..a849432 100644
--- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml
@@ -1,5 +1,5 @@
 # Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml
+default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_qc_glove_rnn_ffn.yml
 
 hyperparameters:
   # In here I am putting some of the hyperparameters from spreadsheet.
diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml
index bca7a7f..87e8912 100644
--- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml
@@ -1,5 +1,5 @@
 # Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml
+default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_qc_glove_rnn_ffn.yml
 
 hyperparameters:
   # In here I am putting some of the hyperparameters from spreadsheet.
diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml
index 3f9aa05..586a990 100644
--- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml
@@ -40,9 +40,9 @@ hyperparameters:
   # Final classifier: FFN.
   answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500]
 
-  batch_size: &batch_size 100
-  preload_images: &preload_images True
-  num_workers: &num_workers 1
+  batch_size: &batch_size 300
+  preload_images: &preload_images False
+  num_workers: &num_workers 4
 
 # Training parameters:
 training:
diff --git a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml b/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml
new file mode 100644
index 0000000..4a8783c
--- /dev/null
+++ b/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml
@@ -0,0 +1,157 @@
+# Load config defining problems for training, validation and testing.
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml,
+  vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml,
+  vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
+
+hyperparameters:
+  # In here I am putting some of the hyperparameters from spreadsheet.
+
+  question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+
+  image_preprocessing: &image_preprocessing normalize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | random_affine | random_horizontal_flip | normalize | all
+
+  # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val)
+  question_image_size_fusion_size_val: &question_image_size_fusion_size_val 1134
+
+  # Final classifier: FFN.
+  answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500]
+
+  batch_size: &batch_size 256
+  preload_images: &preload_images False
+  num_workers: &num_workers 4
+
+# Training parameters:
+training:
+  problem:
+    batch_size: *batch_size
+    categories: C1,C2,C3
+    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: *question_preprocessing
+    image_preprocessing: *image_preprocessing 
+    # Preload images.
+    preload_images: *preload_images
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+  # Use four workers for loading images.
+  dataloader:
+    num_workers: *num_workers
+
+  # Optimizer parameters:
+  optimizer:
+    name: Adam
+    lr: 0.0001
+
+  # Terminal conditions:
+  terminal_conditions:
+    loss_stop: 1.0e-3
+    episode_limit: 10000
+    epoch_limit: -1
+
+# Validation parameters:
+validation:
+  partial_validation_interval: 100
+  problem:
+    batch_size: *batch_size
+    categories: C1,C2,C3
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: *question_preprocessing
+    image_preprocessing: *image_preprocessing 
+    # Preload images: false, as we will need them only once, at the end.
+    preload_images: false
+    streams: 
+      questions: tokenized_questions
+  dataloader:
+    num_workers: 1
+
+
+pipeline:
+
+  ################# PIPE 6: C1 + C2 + C3 questions #################
+
+  # Answer encoding.
+  pipe6_all_answer_indexer:
+    priority: 6.2
+    type: LabelIndexer
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123_binary_yn
+      word_mappings: word_mappings_c123_binary_yn
+
+
+  # Model 4: FFN C123 answering
+  pipe6_c123_answer_classifier:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: *answer_classifier_hidden_sizes_val
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_c123_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c123_binary_yn
+
+  pipe6_c123_nllloss:
+    priority: 6.4
+    type: NLLLoss
+    targets_dim: 1
+    streams:
+      predictions: pipe6_c123_predictions
+      targets: answers_ids
+      loss: pipe6_c123_loss
+
+  pipe6_c123_precision_recall:
+    priority: 6.5
+    type: PrecisionRecallStatistics
+    use_word_mappings: True
+    show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      predictions: pipe6_c123_predictions
+      targets: answers_ids
+    globals:
+      word_mappings: word_mappings_c123_binary_yn
+    statistics:
+      precision: pipe6_c123_precision
+      recall: pipe6_c123_recall
+      f1score: pipe6_c123_f1score
+
+  # C123 Predictions decoder.
+  pipe5_c123_prediction_decoder:
+    priority: 6.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe6_c123_predictions
+      outputs: predicted_answers
+    globals:
+      word_mappings: word_mappings_c123_binary_yn
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Viewers.
+  viewer:
+    priority: 9.3
+    type: StreamViewer
+    input_streams:
+      tokenized_questions,
+      category_names, predicted_category_names,
+      answers, predicted_answers
+
+
+#: pipeline
diff --git a/configs/vqa_med_2019/c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
similarity index 65%
rename from configs/vqa_med_2019/c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml
rename to configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
index 4d554d1..11a9fcb 100644
--- a/configs/vqa_med_2019/c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml
+++ b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
@@ -1,12 +1,13 @@
 # Part of pipeline containing components constituting the "Inputs Fusion" pipeline.
 
 # Inputs:
-#   * 
-#   * 
+#   * tokenized_questions
+#   * images
+#   * image_sizes
 
 # Outputs:
-#   * 
-#   * 
+#   * concatenated_activations
+#   * concatenated_activations_size
 
 # "Inputs Fusion" 
 # 0.: 
@@ -24,13 +25,49 @@ checkpoint: &checkpoint ~/image-clef-2019/experiments/c4_encoders/20190504_20244
 # This one will be skipped, as this is C123 classifier!
 #  + Model 'pipe6_c123_answer_classifier' [FeedForwardNetwork] params saved 
 
+pipe_if0_hyperparameters:
+  # In here I am putting some of the hyperparameters from spreadsheet.
+
+  question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+
+  image_preprocessing: &image_preprocessing normalize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | random_affine | random_horizontal_flip | normalize | all
+
+  # Image encoder.
+  image_encoder_model: &image_encoder_model vgg16
+  # Options: vgg16 | densenet121 | resnet152 | resnet50
+  #image_encoder_output_size_val: &image_encoder_output_size_val 100
+  # INFO: this variable is not important, as we are using features in this pipeline!!
+  
+  # Question encoder.
+  question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt
+  # Options: '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt | glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled
+  question_encoder_embeddings_size_val: &question_encoder_embeddings_size_val 50
+  question_encoder_lstm_size_val: &question_encoder_lstm_size_val 50
+  question_encoder_output_size_val: &question_encoder_output_size_val 100
+  
+  # Fusion I: image + question
+  question_image_fusion_type_val: &question_image_fusion_type VQA_Attention
+  # Options: ElementWiseMultiplication | VQA_Attention
+  #question_image_fusion_size_val: &question_image_fusion_size_val 1124
+  # INFO: this variable is set by VQA_Attention component!
+
+  # Image size encoder.
+  image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10
+
+  # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val)
+  question_image_size_fusion_size_val: &question_image_size_fusion_size_val 1134
+
 
 pipeline:
 
   ################# PIPE 0: SHARED #################
 
   # Add global variables.
-  global_publisher:
+  pipe_if0_global_publisher:
     priority: 0.11
     type: GlobalVariablePublisher
     # Add input_size to globals.
@@ -38,7 +75,7 @@ pipeline:
     values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val] #, *image_encoder_output_size_val] #, *question_image_fusion_size_val]
 
   # Statistics.
-  batch_size:
+  pipe_if0_batch_size:
     priority: 0.12
     type: BatchSizeStatistics
 
@@ -61,7 +98,7 @@ pipeline:
   ################# PIPE 1: SHARED QUESTION ENCODER #################
 
   # Model 1: question embeddings
-  pipe1_question_embeddings:
+  pipe_if1_question_embeddings:
     priority: 1.1
     type: SentenceEmbeddings
     embeddings_size: *question_encoder_embeddings_size_val
@@ -78,10 +115,10 @@ pipeline:
       inputs: tokenized_questions
       outputs: embedded_questions
     globals:
-      embeddings_size: pipe1_embeddings_size     
+      embeddings_size: pipe_if1_embeddings_size     
   
   # Model 2: question RNN
-  pipe1_lstm:
+  pipe_if1_lstm:
     priority: 1.2
     type: RecurrentNeuralNetwork
     cell_type: LSTM
@@ -92,20 +129,20 @@ pipeline:
     # LOAD AND FREEZE #
     load: 
       file: *checkpoint
-      model: pipe1_question_embeddings
+      model: pipe1_lstm
     freeze: True
     ###################
     streams:
       inputs: embedded_questions
       predictions: question_activations
     globals:
-      input_size: pipe1_embeddings_size
+      input_size: pipe_if1_embeddings_size
       prediction_size: question_encoder_output_size
 
   ################# PIPE 2: SHARED IMAGE ENCODER #################
 
   # Image encoder.
-  image_encoder:
+  pipe_if2_image_encoder:
     priority: 2.1
     type: TorchVisionWrapper
     model: *image_encoder_model
@@ -113,7 +150,7 @@ pipeline:
     # LOAD AND FREEZE #
     load: 
       file: *checkpoint
-      model: pipe1_question_embeddings
+      model: image_encoder
     freeze: True
     ###################
     streams:
@@ -123,14 +160,14 @@ pipeline:
   ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
 
   # Model - image size classifier.
-  image_size_encoder:
+  pipe_if3_image_size_encoder:
     priority: 3.1
     type: FeedForwardNetwork 
     use_logsoftmax: False
     # LOAD AND FREEZE #
     load: 
       file: *checkpoint
-      model: pipe1_question_embeddings
+      model: image_size_encoder
     freeze: True
     ###################
     streams:
@@ -142,13 +179,19 @@ pipeline:
 
   ################# PIPE 4: image-question fusion  #################
   # Attention + FF.
-  question_image_fusion:
+  pipe_if4_question_image_fusion:
     priority: 4.1
     type: *question_image_fusion_type
     dropout_rate: 0.5
     # Attention params.
     latent_size: 100
     num_attention_heads: 2
+    # LOAD AND FREEZE #
+    load: 
+      file: *checkpoint
+      model: question_image_fusion
+    freeze: True
+    ###################
     streams:
       image_encodings: feature_maps
       question_encodings: question_activations
@@ -158,12 +201,18 @@ pipeline:
       output_size: fused_activation_size
 
 
-  question_image_ffn:
+  pipe_if4_question_image_ffn:
     priority: 4.2
     type: FeedForwardNetwork 
     #hidden_sizes: [*question_image_fusion_size_val]
     dropout_rate: 0.5
     use_logsoftmax: False
+    # LOAD AND FREEZE #
+    load: 
+      file: *checkpoint
+      model: question_image_ffn
+    freeze: True
+    ###################
     streams:
       inputs: fused_activations
       predictions: question_image_activations
@@ -174,7 +223,7 @@ pipeline:
   ################# PIPE 5: image-question-image size fusion #################
 
   # 5th subpipeline: concatenation 
-  concat:
+  pipe_if5_concat:
     priority: 5.1
     type: Concatenation
     input_streams: [question_image_activations,image_size_activations]
diff --git a/configs/vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
similarity index 100%
rename from configs/vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml
rename to configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
diff --git a/configs/vqa_med_2019/frozen_pipelines/frozen_word_answer_glove_sum.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_word_answer_glove_sum.yml
new file mode 100644
index 0000000..e69de29

From 2ef1df7c76444286883f066af968bf273377aa8c Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <tkornut@us.ibm.com>
Date: Mon, 6 May 2019 10:20:15 -0700
Subject: [PATCH 2/9] clenup

---
 .../example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml      | 3 ++-
 .../tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml      | 3 ++-
 .../tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml      | 3 ++-
 .../tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml          | 3 ++-
 .../tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml          | 3 ++-
 .../tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml          | 3 ++-
 6 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml
index b93eb56..dc34516 100644
--- a/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml
@@ -1,5 +1,6 @@
 # Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
 
 hyperparameters:
   # In here I am putting some of the hyperparameters from spreadsheet.
diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml
index a849432..2091e16 100644
--- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml
@@ -1,5 +1,6 @@
 # Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_qc_glove_rnn_ffn.yml
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
 
 hyperparameters:
   # In here I am putting some of the hyperparameters from spreadsheet.
diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml
index 87e8912..cd28ae0 100644
--- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml
@@ -1,5 +1,6 @@
 # Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_qc_glove_rnn_ffn.yml
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
 
 hyperparameters:
   # In here I am putting some of the hyperparameters from spreadsheet.
diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml
index 586a990..896b221 100644
--- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml
@@ -1,5 +1,6 @@
 # Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
 
 hyperparameters:
   # In here I am putting some of the hyperparameters from spreadsheet.
diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml
index d718eeb..8f9d748 100644
--- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml
@@ -1,5 +1,6 @@
 # Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
 
 hyperparameters:
   # In here I am putting some of the hyperparameters from spreadsheet.
diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml
index 1b4363b..3ec7a77 100644
--- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml
@@ -1,5 +1,6 @@
 # Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
 
 hyperparameters:
   # In here I am putting some of the hyperparameters from spreadsheet.

From 479b479b374970ee92b788d819200206247d91fd Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <tkornut@us.ibm.com>
Date: Mon, 6 May 2019 11:20:07 -0700
Subject: [PATCH 3/9] working on input_fusion_io

---
 .../c123_frozen_if_ffn_c123_loss.yml          |  10 +-
 ...input_fusion_glove_lstm_vgg_att_is_cat.yml |   3 +
 ..._question_categorization_glove_rnn_ffn.yml |   7 +-
 .../input_fusion_processor_io.yml             | 122 ++++++++++++++++++
 ptp/workers/processor.py                      |  36 ++++--
 5 files changed, 157 insertions(+), 21 deletions(-)
 create mode 100644 configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml

diff --git a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml b/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml
index 4a8783c..107f9ac 100644
--- a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml
@@ -77,7 +77,7 @@ pipeline:
   ################# PIPE 6: C1 + C2 + C3 questions #################
 
   # Answer encoding.
-  pipe6_all_answer_indexer:
+  pipe6_c123_binary_yn_answer_indexer:
     priority: 6.2
     type: LabelIndexer
     data_folder: ~/data/vqa-med
@@ -93,7 +93,7 @@ pipeline:
 
 
   # Model 4: FFN C123 answering
-  pipe6_c123_answer_classifier:
+  pipe6_c123_binary_yn_answer_classifier:
     priority: 6.3
     type: FeedForwardNetwork
     hidden: *answer_classifier_hidden_sizes_val
@@ -105,7 +105,7 @@ pipeline:
       input_size: concatenated_activations_size
       prediction_size: vocabulary_size_c123_binary_yn
 
-  pipe6_c123_nllloss:
+  pipe6_c123_binary_yn_nllloss:
     priority: 6.4
     type: NLLLoss
     targets_dim: 1
@@ -114,7 +114,7 @@ pipeline:
       targets: answers_ids
       loss: pipe6_c123_loss
 
-  pipe6_c123_precision_recall:
+  pipe6_c123_binary_yn_precision_recall:
     priority: 6.5
     type: PrecisionRecallStatistics
     use_word_mappings: True
@@ -131,7 +131,7 @@ pipeline:
       f1score: pipe6_c123_f1score
 
   # C123 Predictions decoder.
-  pipe5_c123_prediction_decoder:
+  pipe5_c123_binary_yn_prediction_decoder:
     priority: 6.6
     type: WordDecoder
     # Use the same word mappings as label indexer.
diff --git a/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
index 11a9fcb..5f1d4c5 100644
--- a/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
+++ b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
@@ -1,12 +1,15 @@
 # Part of pipeline containing components constituting the "Inputs Fusion" pipeline.
 
 # Inputs:
+#  streams:
 #   * tokenized_questions
 #   * images
 #   * image_sizes
 
 # Outputs:
+#  streams:
 #   * concatenated_activations
+#  globals:
 #   * concatenated_activations_size
 
 # "Inputs Fusion" 
diff --git a/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
index 6ca6986..c144f57 100644
--- a/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
+++ b/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
@@ -19,6 +19,7 @@
 # 0.56: pipe_qc_category_accuracy
 
 # Loaded checkpoint: 20190505_130406
+checkpoint: &checkpoint ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
 
 pipeline:
   ################# PIPE: QUESTION CATEGORIZATION #################
@@ -37,7 +38,7 @@ pipeline:
     type: SentenceEmbeddings
     # LOAD AND FREEZE #
     load: 
-      file: ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      file: *checkpoint
       model: question_embeddings
     freeze: True
     ###################
@@ -58,7 +59,7 @@ pipeline:
     cell_type: LSTM
     # LOAD AND FREEZE #
     load: 
-      file: ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      file: *checkpoint
       model: lstm
     freeze: True
     ###################
@@ -78,7 +79,7 @@ pipeline:
     type: FeedForwardNetwork
     # LOAD AND FREEZE #
     load: 
-      file: ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
+      file: *checkpoint
       model: classifier
     freeze: True
     ###################
diff --git a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml
new file mode 100644
index 0000000..c4615bd
--- /dev/null
+++ b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml
@@ -0,0 +1,122 @@
+# Load config defining problems for training, validation and testing.
+default_configs:
+  vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml,
+  vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
+
+
+training_validation:
+  problem:
+    type: &p_type VQAMED2019
+    data_folder: &data_folder ~/data/vqa-med
+    split: training_validation
+    categories: all
+    resize_image: &resize_image [224, 224]
+    batch_size: 64
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      questions: tokenized_questions
+
+  dataloader:
+    # No sampler, process samples in the same order.
+    shuffle: false
+    # Use 1 worker, so batches will follow the samples order.
+    num_workers: 1
+
+
+hyperparams:
+  # Final classifier: FFN.
+  answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500]
+
+
+# Add component for exporting answers to files.
+pipeline:
+  name: input_fusion
+
+  ################# PIPE 6: C1 + C2 + C3 questions #################
+
+  # Answer encoding.
+  pipe6_c123_binary_yn_answer_indexer:
+    priority: 6.2
+    type: LabelIndexer
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123_binary_yn
+      word_mappings: word_mappings_c123_binary_yn
+
+
+  # Model 4: FFN C123 answering
+  pipe6_c123_binary_yn_answer_classifier:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: *answer_classifier_hidden_sizes_val
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_c123_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c123_binary_yn
+
+  pipe6_c123_binary_yn_nllloss:
+    priority: 6.4
+    type: NLLLoss
+    targets_dim: 1
+    streams:
+      predictions: pipe6_c123_predictions
+      targets: answers_ids
+      loss: pipe6_c123_loss
+
+  pipe6_c123_binary_yn_precision_recall:
+    priority: 6.5
+    type: PrecisionRecallStatistics
+    use_word_mappings: True
+    show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      predictions: pipe6_c123_predictions
+      targets: answers_ids
+    globals:
+      word_mappings: word_mappings_c123_binary_yn
+    statistics:
+      precision: pipe6_c123_precision
+      recall: pipe6_c123_recall
+      f1score: pipe6_c123_f1score
+
+  # C123 Predictions decoder.
+  pipe5_c123_binary_yn_prediction_decoder:
+    priority: 6.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe6_c123_predictions
+      outputs: predicted_answers
+    globals:
+      word_mappings: word_mappings_c123_binary_yn
+
+  # Viewers.
+  viewer_extended:
+    priority: 100.4
+    type: StreamViewer
+    sample_number: 0
+    input_streams: 
+      indices,image_ids,tokenized_questions,
+      category_names,predicted_categories,
+      answers,tokenized_answers,predicted_answers
+
+  fused_inputs_exporter:
+    priority: 100.5
+    type: StreamFileExporter
+    separator: '|'
+    filename: 'fused_inputs.csv'
+    export_separator_line_to_csv: False
+    input_streams: 
+      indices
+
diff --git a/ptp/workers/processor.py b/ptp/workers/processor.py
index b5afa68..c85cfbc 100644
--- a/ptp/workers/processor.py
+++ b/ptp/workers/processor.py
@@ -86,20 +86,24 @@ def setup_global_experiment(self):
             self.logger.error("Cannot use GPU as there are no CUDA-compatible devices present in the system!")
             exit(-1)
 
+        # Config that will be used.
+        abs_root_configs = None
 
         # Check if checkpoint file was indicated.
-        if chkpt_file == "":
-            print('Please pass path to and name of the file containing pipeline to be loaded as --load parameter')
-            exit(-2)
+        if chkpt_file != "":
+            #print('Please pass path to and name of the file containing pipeline to be loaded as --load parameter')
+            #exit(-2)
 
-        # Check if file with model exists.
-        if not path.isfile(chkpt_file):
-            print('Checkpoint file {} does not exist'.format(chkpt_file))
-            exit(-3)
+            # Check if file with model exists.
+            if not path.isfile(chkpt_file):
+                print('Checkpoint file {} does not exist'.format(chkpt_file))
+                exit(-3)
 
-        # Extract path.
-        self.abs_path, _ = path.split(path.dirname(path.expanduser(chkpt_file)))
-        print(self.abs_path)
+            # Extract path.
+            self.abs_path, _ = path.split(path.dirname(path.expanduser(chkpt_file)))
+
+            # Use the "default" config.
+            abs_root_configs = [path.join(self.abs_path, 'training_configuration.yml')]
 
         # Check if config file was indicated by the user.
         if self.app_state.args.config != '':
@@ -107,9 +111,15 @@ def setup_global_experiment(self):
             root_configs = self.app_state.args.config.replace(" ", "").split(',')
             # If there are - expand them to absolute paths.
             abs_root_configs = [path.expanduser(config) for config in root_configs]
-        else:
-            # Use the "default one".
-            abs_root_configs = [path.join(self.abs_path, 'training_configuration.yml')]
+
+            # Use path to experiments.
+            self.abs_path = path.expanduser(self.app_state.args.expdir)
+
+
+        if abs_root_configs is None:
+            print('Please indicate configuration file to be used (--config) and/or pass path to and name of the file containing pipeline to be loaded (--load)')
+            exit(-2)
+
 
         # Get the list of configurations which need to be loaded.
         configs_to_load = config_parsing.recurrent_config_parse(abs_root_configs, [], self.app_state.absolute_config_path)

From 1786f165e17542045af32bd538bea40bfdea61e6 Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <tkornut@us.ibm.com>
Date: Mon, 6 May 2019 11:35:15 -0700
Subject: [PATCH 4/9] exporting of fused inputs

---
 .../input_fusion_processor_io.yml             | 61 ++-----------------
 ptp/workers/processor.py                      |  9 ++-
 2 files changed, 13 insertions(+), 57 deletions(-)

diff --git a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml
index c4615bd..76392a6 100644
--- a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml
+++ b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml
@@ -31,7 +31,7 @@ hyperparams:
 
 # Add component for exporting answers to files.
 pipeline:
-  name: input_fusion
+  name: input_fusion_processor_io
 
   ################# PIPE 6: C1 + C2 + C3 questions #################
 
@@ -51,56 +51,6 @@ pipeline:
       word_mappings: word_mappings_c123_binary_yn
 
 
-  # Model 4: FFN C123 answering
-  pipe6_c123_binary_yn_answer_classifier:
-    priority: 6.3
-    type: FeedForwardNetwork
-    hidden: *answer_classifier_hidden_sizes_val
-    dropout_rate: 0.5
-    streams:
-      inputs: concatenated_activations
-      predictions: pipe6_c123_predictions
-    globals:
-      input_size: concatenated_activations_size
-      prediction_size: vocabulary_size_c123_binary_yn
-
-  pipe6_c123_binary_yn_nllloss:
-    priority: 6.4
-    type: NLLLoss
-    targets_dim: 1
-    streams:
-      predictions: pipe6_c123_predictions
-      targets: answers_ids
-      loss: pipe6_c123_loss
-
-  pipe6_c123_binary_yn_precision_recall:
-    priority: 6.5
-    type: PrecisionRecallStatistics
-    use_word_mappings: True
-    show_class_scores: True
-    #show_confusion_matrix: True
-    streams:
-      predictions: pipe6_c123_predictions
-      targets: answers_ids
-    globals:
-      word_mappings: word_mappings_c123_binary_yn
-    statistics:
-      precision: pipe6_c123_precision
-      recall: pipe6_c123_recall
-      f1score: pipe6_c123_f1score
-
-  # C123 Predictions decoder.
-  pipe5_c123_binary_yn_prediction_decoder:
-    priority: 6.6
-    type: WordDecoder
-    # Use the same word mappings as label indexer.
-    import_word_mappings_from_globals: True
-    streams:
-      inputs: pipe6_c123_predictions
-      outputs: predicted_answers
-    globals:
-      word_mappings: word_mappings_c123_binary_yn
-
   # Viewers.
   viewer_extended:
     priority: 100.4
@@ -108,15 +58,16 @@ pipeline:
     sample_number: 0
     input_streams: 
       indices,image_ids,tokenized_questions,
-      category_names,predicted_categories,
-      answers,tokenized_answers,predicted_answers
+      concatenated_activations_size,
+      category,names,
+      answers
 
   fused_inputs_exporter:
     priority: 100.5
     type: StreamFileExporter
     separator: '|'
     filename: 'fused_inputs.csv'
-    export_separator_line_to_csv: False
+    export_separator_line_to_csv: True
     input_streams: 
-      indices
+      indices, concatenated_activations
 
diff --git a/ptp/workers/processor.py b/ptp/workers/processor.py
index c85cfbc..0abadc7 100644
--- a/ptp/workers/processor.py
+++ b/ptp/workers/processor.py
@@ -112,8 +112,13 @@ def setup_global_experiment(self):
             # If there are - expand them to absolute paths.
             abs_root_configs = [path.expanduser(config) for config in root_configs]
 
-            # Use path to experiments.
-            self.abs_path = path.expanduser(self.app_state.args.expdir)
+            # Using name of the first configuration file from command line.
+            basename = path.basename(root_configs[0])
+            # Take config filename without extension.
+            pipeline_name = path.splitext(basename)[0] 
+
+            # Use path to experiments + pipeline.
+            self.abs_path = path.join(path.expanduser(self.app_state.args.expdir), pipeline_name)
 
 
         if abs_root_configs is None:

From e4168d2e37f75643897d6a0cc54ba035583389ba Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <tkornut@us.ibm.com>
Date: Mon, 6 May 2019 11:43:47 -0700
Subject: [PATCH 5/9] config cleanups

---
 .../c4_frozen_if_ffn_c4_loss.yml              | 151 ++++++++++++++++++
 .../example_frozen_if_ffn_c123_loss.yml}      |   5 +-
 ...input_fusion_glove_lstm_vgg_att_is_cat.yml |  37 +----
 .../input_fusion_processor_io.yml             |   2 +-
 4 files changed, 156 insertions(+), 39 deletions(-)
 create mode 100644 configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml
 rename configs/vqa_med_2019/{frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml => evaluation/example_frozen_if_ffn_c123_loss.yml} (95%)

diff --git a/configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml b/configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml
new file mode 100644
index 0000000..732366a
--- /dev/null
+++ b/configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml
@@ -0,0 +1,151 @@
+# Load config defining problems for training, validation and testing.
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml,
+  vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml,
+  vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
+
+c4_hyperparameters:
+  # In here I am putting some of the hyperparameters from spreadsheet.
+
+  question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+
+  image_preprocessing: &image_preprocessing normalize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | random_affine | random_horizontal_flip | normalize | all
+
+  batch_size: &batch_size 256
+  preload_images: &preload_images False
+  num_workers: &num_workers 4
+
+# Training parameters:
+training:
+  problem:
+    batch_size: *batch_size
+    categories: C4
+    export_sample_weights: ~/data/vqa-med/answers.c4.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: *question_preprocessing
+    image_preprocessing: *image_preprocessing 
+    # Preload images.
+    preload_images: *preload_images
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.c4.weights.csv
+  # Use four workers for loading images.
+  dataloader:
+    num_workers: *num_workers
+
+  # Optimizer parameters:
+  optimizer:
+    name: Adam
+    lr: 0.0001
+
+  # Terminal conditions:
+  terminal_conditions:
+    loss_stop: 1.0e-3
+    episode_limit: 10000
+    epoch_limit: -1
+
+# Validation parameters:
+validation:
+  partial_validation_interval: 100
+  problem:
+    batch_size: *batch_size
+    categories: C4
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: *question_preprocessing
+    image_preprocessing: *image_preprocessing 
+    # Preload images: false, as we will need them only once, at the end.
+    preload_images: false
+    streams: 
+      questions: tokenized_questions
+  dataloader:
+    num_workers: 1
+
+
+pipeline:
+
+  ################# PIPE 6: C1 + C2 + C3 questions #################
+
+  # Answer encoding.
+  pipe6_c123_binary_yn_answer_indexer:
+    priority: 6.2
+    type: LabelIndexer
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123_binary_yn
+      word_mappings: word_mappings_c123_binary_yn
+
+
+  # Model 4: FFN C123 answering
+  pipe6_c123_binary_yn_answer_classifier:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: *answer_classifier_hidden_sizes_val
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_c123_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c123_binary_yn
+
+  pipe6_c123_binary_yn_nllloss:
+    priority: 6.4
+    type: NLLLoss
+    targets_dim: 1
+    streams:
+      predictions: pipe6_c123_predictions
+      targets: answers_ids
+      loss: pipe6_c123_loss
+
+  pipe6_c123_binary_yn_precision_recall:
+    priority: 6.5
+    type: PrecisionRecallStatistics
+    use_word_mappings: True
+    show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      predictions: pipe6_c123_predictions
+      targets: answers_ids
+    globals:
+      word_mappings: word_mappings_c123_binary_yn
+    statistics:
+      precision: pipe6_c123_precision
+      recall: pipe6_c123_recall
+      f1score: pipe6_c123_f1score
+
+  # C123 Predictions decoder.
+  pipe5_c123_binary_yn_prediction_decoder:
+    priority: 6.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe6_c123_predictions
+      outputs: predicted_answers
+    globals:
+      word_mappings: word_mappings_c123_binary_yn
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Viewers.
+  viewer:
+    priority: 9.3
+    type: StreamViewer
+    input_streams:
+      tokenized_questions,
+      category_names, predicted_category_names,
+      answers, predicted_answers
+
+
+#: pipeline
diff --git a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml
similarity index 95%
rename from configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml
rename to configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml
index 107f9ac..e82be5a 100644
--- a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml
+++ b/configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml
@@ -4,7 +4,7 @@ default_configs:
   vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml,
   vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
 
-hyperparameters:
+c123_hyperparameters:
   # In here I am putting some of the hyperparameters from spreadsheet.
 
   question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize
@@ -15,9 +15,6 @@ hyperparameters:
   # Accepted formats: a,b,c or [a,b,c]
   # none | random_affine | random_horizontal_flip | normalize | all
 
-  # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val)
-  question_image_size_fusion_size_val: &question_image_size_fusion_size_val 1134
-
   # Final classifier: FFN.
   answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500]
 
diff --git a/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
index 5f1d4c5..c3ae040 100644
--- a/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
+++ b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
@@ -29,21 +29,10 @@ checkpoint: &checkpoint ~/image-clef-2019/experiments/c4_encoders/20190504_20244
 #  + Model 'pipe6_c123_answer_classifier' [FeedForwardNetwork] params saved 
 
 pipe_if0_hyperparameters:
-  # In here I am putting some of the hyperparameters from spreadsheet.
-
-  question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize
-  # Accepted formats: a,b,c or [a,b,c]
-  # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
-
-  image_preprocessing: &image_preprocessing normalize
-  # Accepted formats: a,b,c or [a,b,c]
-  # none | random_affine | random_horizontal_flip | normalize | all
+  # WARNING: as we are loading the pretrained pipeline, all those values must stay!
 
   # Image encoder.
   image_encoder_model: &image_encoder_model vgg16
-  # Options: vgg16 | densenet121 | resnet152 | resnet50
-  #image_encoder_output_size_val: &image_encoder_output_size_val 100
-  # INFO: this variable is not important, as we are using features in this pipeline!!
   
   # Question encoder.
   question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt
@@ -54,9 +43,6 @@ pipe_if0_hyperparameters:
   
   # Fusion I: image + question
   question_image_fusion_type_val: &question_image_fusion_type VQA_Attention
-  # Options: ElementWiseMultiplication | VQA_Attention
-  #question_image_fusion_size_val: &question_image_fusion_size_val 1124
-  # INFO: this variable is set by VQA_Attention component!
 
   # Image size encoder.
   image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10
@@ -74,30 +60,14 @@ pipeline:
     priority: 0.11
     type: GlobalVariablePublisher
     # Add input_size to globals.
-    keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size] #, image_encoder_output_size] #, fused_activation_size]
-    values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val] #, *image_encoder_output_size_val] #, *question_image_fusion_size_val]
+    keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size]
+    values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val]
 
   # Statistics.
   pipe_if0_batch_size:
     priority: 0.12
     type: BatchSizeStatistics
 
-  # Answer encoding.
-  #pipe1_all_answer_indexer:
-  #  priority: 0.13
-  #  type: LabelIndexer
-  #  data_folder: ~/data/vqa-med
-  #  word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
-  #  # Export mappings and size to globals.
-  #  export_word_mappings_to_globals: True
-  #  streams:
-  #    inputs: answers
-  #    outputs: answers_ids
-  #  globals:
-  #    vocabulary_size: vocabulary_size_c123_binary_yn
-  #    word_mappings: word_mappings_c123_binary_yn
-
-
   ################# PIPE 1: SHARED QUESTION ENCODER #################
 
   # Model 1: question embeddings
@@ -207,7 +177,6 @@ pipeline:
   pipe_if4_question_image_ffn:
     priority: 4.2
     type: FeedForwardNetwork 
-    #hidden_sizes: [*question_image_fusion_size_val]
     dropout_rate: 0.5
     use_logsoftmax: False
     # LOAD AND FREEZE #
diff --git a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml
index 76392a6..0f8754d 100644
--- a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml
+++ b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml
@@ -69,5 +69,5 @@ pipeline:
     filename: 'fused_inputs.csv'
     export_separator_line_to_csv: True
     input_streams: 
-      indices, concatenated_activations
+      indices #, concatenated_activations
 

From 1d0cdc234d9a7af7c14219e6d33f912267c4657c Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <tkornut@us.ibm.com>
Date: Mon, 6 May 2019 11:53:40 -0700
Subject: [PATCH 6/9] cleanups in c4 configs

---
 ...n_c4_loss.yml => c4_frozen_if_gru_dec.yml} |   0
 .../c4_lstm_vgg16_ewm_cat_is_attdec.yml       | 236 ------------------
 2 files changed, 236 deletions(-)
 rename configs/vqa_med_2019/c4_classification/{c4_frozen_if_ffn_c4_loss.yml => c4_frozen_if_gru_dec.yml} (100%)
 delete mode 100644 configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml

diff --git a/configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml b/configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml
similarity index 100%
rename from configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml
rename to configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml
diff --git a/configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml b/configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml
deleted file mode 100644
index ee05864..0000000
--- a/configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml
+++ /dev/null
@@ -1,236 +0,0 @@
-# Load config defining problems for training, validation and testing.
-default_configs: vqa_med_2019/default_vqa_med_2019.yml
-
-# Training parameters:
-training:
-  problem:
-    batch_size: 32 # 200 requires to use 4 GPUs!
-    categories: C4
-    question_preprocessing: lowercase, remove_punctuation, tokenize #, random_remove_stop_words #,random_shuffle_words 
-    answer_preprocessing: lowercase, remove_punctuation, tokenize
-    export_sample_weights: ~/data/vqa-med/answers.c4.weights.csv
-  sampler:
-    weights: ~/data/vqa-med/answers.c4.weights.csv
-  dataloader:
-    num_workers: 4
-  # Termination.
-  terminal_conditions:
-    loss_stop: 1.0e-2
-    episode_limit: 1000000
-    epoch_limit: -1
-
-# Validation parameters:
-validation:
-  problem:
-    batch_size: 32
-    categories: C4
-    question_preprocessing: lowercase, remove_punctuation, tokenize
-    answer_preprocessing: lowercase, remove_punctuation, tokenize
-  dataloader:
-    num_workers: 4
-
-pipeline:
-
-  ################# PIPE 1: SHARED #################
-
-  global_publisher:
-    priority: 1.0
-    type: GlobalVariablePublisher
-    # Add input_size to globals.
-    keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size]
-    values: [100, 500, 100, 2, 10]
-
-  # Answer encoding.
-  answer_indexer:
-    priority: 1.1
-    type: SentenceIndexer
-    data_folder: ~/data/vqa-med
-    word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv
-    # Export answer word mappings to globals.
-    export_word_mappings_to_globals: True
-    export_pad_mapping_to_globals: True
-    additional_tokens: <PAD>,<EOS>
-    # Add <EOS> token at the end of sentence.
-    eos_token: True
-    fixed_padding: 10 # The longest question! max is 19!
-    streams:
-      inputs: answers
-      outputs: indexed_answers
-    globals:
-      vocabulary_size: ans_vocabulary_size
-      word_mappings: ans_word_mappings
-      pad_index: ans_pad_index
-
-
-  ################# PIPE 2: SHARED QUESTION ENCODER #################
-
-  # Question embeddings
-  question_embeddings:
-    priority: 2.0
-    type: SentenceEmbeddings
-    embeddings_size: 50
-    pretrained_embeddings_file: glove.6B.50d.txt
-    data_folder: ~/data/vqa-med
-    word_mappings_file: questions.all.word.mappings.csv
-    fixed_padding: 10 # The longest question! max is 19!
-    additional_tokens: <PAD>,<EOS>
-    streams:
-      inputs: questions
-      outputs: embedded_questions
-
-  # Single layer GRU Encoder
-  question_encoder:
-    priority: 2.1
-    type: RecurrentNeuralNetwork
-    # Do not wrap that model with DataDictParallel!
-    #parallelize: False
-    cell_type: GRU
-    initial_state: Trainable
-    hidden_size: 50
-    num_layers: 1
-    # We will project outputs that should reassemble outputs of answer word embeddings.
-    use_logsoftmax: False
-    output_last_state: True
-    prediction_mode: Dense
-    #ffn_output: False
-    #dropout_rate: 0.1
-    streams:
-      inputs: embedded_questions
-      predictions: s2s_encoder_output
-      output_state: s2s_state_output
-    globals:
-      input_size: embeddings_size
-      prediction_size: question_encoder_output_size 
-
-  ################# PIPE 2: SHARED IMAGE ENCODER #################
-
-  # Image encoder.
-  image_encoder:
-    priority: 2.0
-    type: TorchVisionWrapper
-    model_type: vgg16
-    streams:
-      inputs: images
-      outputs: image_activations
-    globals:
-      output_size: image_encoder_output_size
-
-
-
-  question_hidden_state_reshaper:
-    priority: 3.01
-    type: ReshapeTensor
-    input_dims: [-1, 1, 100]
-    output_dims: [-1, 100]
-    streams:
-      inputs: s2s_state_output
-      outputs: s2s_state_output_reshaped
-    globals:
-      output_size: s2s_state_output_reshaped_size
-
-  # Element wise multiplication + FF.
-  question_image_fusion:
-    priority: 3.1
-    type: ElementWiseMultiplication
-    dropout_rate: 0.5
-    streams:
-      image_encodings: image_activations
-      question_encodings: s2s_state_output_reshaped
-      outputs: element_wise_activations
-    globals:
-      image_encoding_size: image_encoder_output_size
-      question_encoding_size: question_encoder_output_size
-      output_size: element_wise_activation_size
-
-  question_image_to_answer_space_projection_ffn:
-    # Role of this component is to "project" output of fusion component to "answer space".
-    priority: 3.2
-    type: FeedForwardNetwork 
-    hidden_sizes: [100]
-    dropout_rate: 0.5
-    # Output should not go throught softmax!
-    use_logsoftmax: False
-    streams:
-      inputs: element_wise_activations
-      predictions: question_image_activations
-    globals:
-      input_size: element_wise_activation_size
-      prediction_size: ans_vocabulary_size
-
-  projected_question_image_reshaper:
-    priority: 3.3
-    type: ReshapeTensor
-    input_dims: [-1, 2088]
-    output_dims: [-1, 1, 2088]
-    streams:
-      inputs: question_image_activations
-      outputs: question_image_activations_reshaped
-    globals:
-      output_size: question_image_activations_reshaped_size
-
-  # Single layer GRU Decoder with attention
-  decoder:
-    type: Attn_Decoder_RNN
-    priority: 4
-    hidden_size: 100
-    # Output layer is softmax layer, projecting "1-hot like word encodings".
-    use_logsoftmax: True
-    autoregression_length: 10 # Current implementation requires this value to be equal to fixed_padding in SentenceEmbeddings/Indexer...
-    prediction_mode: Dense
-    dropout_rate: 0.1
-    streams:
-      inputs: s2s_encoder_output
-      predictions: predictions
-      input_state: question_image_activations_reshaped
-    globals:
-      input_size: ans_vocabulary_size
-      prediction_size: ans_vocabulary_size 
-
-  s# Loss
-  nllloss:
-    type: NLLLoss
-    priority: 6
-    num_targets_dims: 2
-    streams:
-      targets: indexed_answers
-      loss: loss
-    globals:
-      ignore_index: ans_pad_index
-
-  # Prediction decoding.
-  prediction_decoder:
-    priority: 10
-    type: SentenceIndexer
-    # Reverse mode.
-    reverse: True
-    # Use distributions as inputs.
-    use_input_distributions: True
-    data_folder: ~/data/vqa-med
-    import_word_mappings_from_globals: True
-    globals:
-      word_mappings: ans_word_mappings
-    streams:
-      inputs: predictions
-      outputs: prediction_sentences
-
-  # Statistics.
-  batch_size:
-    type: BatchSizeStatistics
-    priority: 100.0
-
-  bleu:
-    type: BLEUStatistics
-    priority: 100.2
-    globals:
-      word_mappings: ans_word_mappings
-    streams:
-      targets: indexed_answers
-
-      
-  # Viewers.
-  viewer:
-    type: StreamViewer
-    priority: 100.3
-    input_streams: questions,answers,indexed_answers,prediction_sentences
-
-#: pipeline

From 6c35bc193ab7c1c9b56cc9c475e87009952600fa Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <tkornut@us.ibm.com>
Date: Mon, 6 May 2019 13:23:58 -0700
Subject: [PATCH 7/9] reverted changes on gru decoder with attention, adde
 config for training c124 using pretrained input fusion pipeline

---
 .../components/models/attn_decoder_rnn..yml   |  78 +++++++++
 .../evaluation/frozen_if_ffn_c1234_loss.yml   | 154 ++++++++++++++++++
 ...3_loss.yml => frozen_if_ffn_c123_loss.yml} |   0
 3 files changed, 232 insertions(+)
 create mode 100644 configs/default/components/models/attn_decoder_rnn..yml
 create mode 100644 configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml
 rename configs/vqa_med_2019/evaluation/{example_frozen_if_ffn_c123_loss.yml => frozen_if_ffn_c123_loss.yml} (100%)

diff --git a/configs/default/components/models/attn_decoder_rnn..yml b/configs/default/components/models/attn_decoder_rnn..yml
new file mode 100644
index 0000000..f676809
--- /dev/null
+++ b/configs/default/components/models/attn_decoder_rnn..yml
@@ -0,0 +1,78 @@
+# This file defines the default values for the GRU decoder with attention.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Size of the hidden state (LOADED)
+hidden_size: 100
+
+# Wether to include the last hidden state in the outputs
+output_last_state: False
+
+# Type of recurrent cell (LOADED)
+# -> Only GRU is supported
+
+# Number of "stacked" layers (LOADED)
+# -> Only a single layer is supported
+
+# Dropout rate (LOADED)
+# Default: 0 (means that it is turned off)
+dropout_rate: 0
+
+# Prediction mode (LOADED)
+# Options: 
+#   * Dense (passes every activation through output layer) |
+#   * Last (passes only the last activation though output layer) |
+#   * None (all outputs are discarded)
+prediction_mode: Dense
+
+# Enable FFN layer at the output of the RNN (before eventual feed back in the case of autoregression).
+# Useful if the raw outputs of the RNN are needed, for attention encoder-decoder for example.
+ffn_output: True
+
+# Length of generated output sequence (LOADED)
+# User must set it per task, as it is task specific.
+autoregression_length: 10
+
+# If true, output of the last layer will be additionally processed with Log Softmax (LOADED)
+use_logsoftmax: True
+
+streams: 
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of encoder outputs (INPUT)
+  inputs: inputs
+
+  # Stream containing the inital state of the RNN (INPUT)
+  # The stream will be actually created only if `inital_state: Input`
+  input_state: input_state
+
+  # Stream containing predictions (OUTPUT)
+  predictions: predictions
+
+  # Stream containing the final output state of the RNN (output)
+  # The stream will be actually created only if `output_last_state: True`
+  output_state: output_state
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  # Size of the input (RETRIEVED)
+  input_size: input_size
+
+  # Size of the prediction (RETRIEVED)
+  prediction_size: prediction_size
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
+
diff --git a/configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml
new file mode 100644
index 0000000..9407c13
--- /dev/null
+++ b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml
@@ -0,0 +1,154 @@
+# Load config defining problems for training, validation and testing.
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml,
+  vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml
+  #vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
+
+c123_hyperparameters:
+  # In here I am putting some of the hyperparameters from spreadsheet.
+
+  question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+
+  image_preprocessing: &image_preprocessing normalize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | random_affine | random_horizontal_flip | normalize | all
+
+  # Final classifier: FFN.
+  answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500,500]
+
+  batch_size: &batch_size 256
+  preload_images: &preload_images False
+  num_workers: &num_workers 4
+
+# Training parameters:
+training:
+  problem:
+    batch_size: *batch_size
+    categories: all
+    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_c4_binary_yn.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: *question_preprocessing
+    image_preprocessing: *image_preprocessing 
+    # Preload images.
+    preload_images: *preload_images
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.c1_c2_c3_c4_binary_yn.weights.csv
+  # Use four workers for loading images.
+  dataloader:
+    num_workers: *num_workers
+
+  # Optimizer parameters:
+  optimizer:
+    name: Adam
+    lr: 0.0001
+
+  # Terminal conditions:
+  terminal_conditions:
+    loss_stop: 1.0e-3
+    episode_limit: 10000
+    epoch_limit: -1
+
+# Validation parameters:
+validation:
+  partial_validation_interval: 100
+  problem:
+    batch_size: *batch_size
+    categories: all
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: *question_preprocessing
+    image_preprocessing: *image_preprocessing 
+    # Preload images: false, as we will need them only once, at the end.
+    preload_images: false
+    streams: 
+      questions: tokenized_questions
+  dataloader:
+    num_workers: 1
+
+
+pipeline:
+
+  ################# PIPE 6: C1 + C2 + C3 questions #################
+
+  # Answer encoding.
+  pipe6_c1234_answer_indexer:
+    priority: 6.2
+    type: LabelIndexer
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.all.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c1234_binary_yn
+      word_mappings: word_mappings_c1234_binary_yn
+
+
+  # Model 4: FFN C123 answering
+  pipe6_c1234_answer_classifier:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: *answer_classifier_hidden_sizes_val
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_c1234_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c1234_binary_yn
+
+  pipe6_c1234_nllloss:
+    priority: 6.4
+    type: NLLLoss
+    targets_dim: 1
+    streams:
+      predictions: pipe6_c1234_predictions
+      targets: answers_ids
+      loss: pipe6_c1234_loss
+
+  pipe6_c1234_precision_recall:
+    priority: 6.5
+    type: PrecisionRecallStatistics
+    use_word_mappings: True
+    #show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      predictions: pipe6_c1234_predictions
+      targets: answers_ids
+    globals:
+      word_mappings: word_mappings_c1234_binary_yn
+    statistics:
+      precision: pipe6_c1234_precision
+      recall: pipe6_c1234_recall
+      f1score: pipe6_c1234_f1score
+
+  # C123 Predictions decoder.
+  pipe6_c1234_binary_yn_prediction_decoder:
+    priority: 6.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe6_c1234_predictions
+      outputs: predicted_answers
+    globals:
+      word_mappings: word_mappings_c1234_binary_yn
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Viewers.
+  viewer:
+    priority: 9.3
+    type: StreamViewer
+    input_streams:
+      tokenized_questions,
+      category_names, predicted_category_names,
+      answers, predicted_answers
+
+
+#: pipeline
diff --git a/configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c123_loss.yml
similarity index 100%
rename from configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml
rename to configs/vqa_med_2019/evaluation/frozen_if_ffn_c123_loss.yml

From afbbcb129f0c4af06466bed4116deb5bebbfbd26 Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <tkornut@us.ibm.com>
Date: Mon, 6 May 2019 13:28:22 -0700
Subject: [PATCH 8/9] missing rnn

---
 ptp/components/models/attn_decoder_rnn..py | 242 +++++++++++++++++++++
 1 file changed, 242 insertions(+)
 create mode 100644 ptp/components/models/attn_decoder_rnn..py

diff --git a/ptp/components/models/attn_decoder_rnn..py b/ptp/components/models/attn_decoder_rnn..py
new file mode 100644
index 0000000..32a2c14
--- /dev/null
+++ b/ptp/components/models/attn_decoder_rnn..py
@@ -0,0 +1,242 @@
+# Copyright (C) Alexis Asseman, IBM Corporation 2019
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__author__ = "Alexis Asseman"
+
+import torch
+
+from ptp.configuration.configuration_error import ConfigurationError
+from ptp.components.models.model import Model
+from ptp.data_types.data_definition import DataDefinition
+
+
+class Attn_Decoder_RNN(Model): 
+    """
+    Single layer GRU decoder with attention:
+    Bahdanau, D., Cho, K., & Bengio, Y. (2014). Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473.
+    
+    Needs the full sequence of hidden states from the encoder as input, as well as the last hidden state from the encoder as input state.
+
+    Code is based on https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html.
+    """
+    def __init__(self, name, config):
+        """
+        Initializes the model.
+
+        :param config: Dictionary of parameters (read from configuration ``.yaml`` file).
+        :type config: ``ptp.configuration.ConfigInterface``
+        """
+        # Call constructors of parent classes.
+        Model.__init__(self, name, Attn_Decoder_RNN, config)
+
+        # Get input/output mode
+        self.output_last_state = self.config["output_last_state"]
+        self.ffn_output = self.config["ffn_output"]
+
+        # Get prediction mode from configuration.
+        self.prediction_mode = self.config["prediction_mode"]
+        if self.prediction_mode not in ['Dense','Last', 'None']:
+            raise ConfigurationError("Invalid 'prediction_mode' (current {}, available {})".format(self.prediction_mode, ['Dense','Last', 'None']))
+
+        self.autoregression_length = self.config["autoregression_length"]
+
+        # Retrieve input size from global variables.
+        self.key_input_size = self.global_keys["input_size"]
+        self.input_size = self.globals["input_size"]
+        if type(self.input_size) == list:
+            if len(self.input_size) == 1:
+                self.input_size = self.input_size[0]
+            else:
+                raise ConfigurationError("RNN input size '{}' must be a single dimension (current {})".format(self.key_input_size, self.input_size))
+
+        # Retrieve output (prediction) size from global params.
+        self.prediction_size = self.globals["prediction_size"]
+        if type(self.prediction_size) == list:
+            if len(self.prediction_size) == 1:
+                self.prediction_size = self.prediction_size[0]
+            else:
+                raise ConfigurationError("RNN prediction size '{}' must be a single dimension (current {})".format(self.key_prediction_size, self.prediction_size))
+        
+        # Retrieve hidden size from configuration.
+        self.hidden_size = self.config["hidden_size"]
+        if type(self.hidden_size) == list:
+            if len(self.hidden_size) == 1:
+                self.hidden_size = self.hidden_size[0]
+            else:
+                raise ConfigurationError("RNN hidden_size must be a single dimension (current {})".format(self.hidden_size))
+        
+        # Get dropout rate value from config.
+        dropout_rate = self.config["dropout_rate"]
+
+        # Create dropout layer.
+        self.dropout = torch.nn.Dropout(dropout_rate)
+
+        # Create rnn cell: hardcoded one layer GRU.
+        self.rnn_cell = getattr(torch.nn, "GRU")(self.input_size, self.hidden_size, 1, dropout=dropout_rate, batch_first=True)
+
+        # Create layers for the attention
+        self.attn = torch.nn.Linear(self.hidden_size * 2, self.autoregression_length)
+        self.attn_combine = torch.nn.Linear(self.hidden_size * 2, self.hidden_size)
+
+        # Create the trainable initial input for the decoder (A trained <SOS> token of sorts)
+        self.sos_token = torch.zeros(1, self.input_size)
+        torch.nn.init.xavier_uniform(self.sos_token)
+        self.sos_token = torch.nn.Parameter(self.sos_token, requires_grad=True)
+
+        # Get key mappings.
+        self.key_inputs = self.stream_keys["inputs"]
+        self.key_predictions = self.stream_keys["predictions"]
+        self.key_input_state = self.stream_keys["input_state"]
+        if self.output_last_state:
+            self.key_output_state = self.stream_keys["output_state"]
+        
+        self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size))
+
+        # Create the output layer.
+        self.activation2output_layer = None
+        if(self.ffn_output):
+            self.activation2output_layer = torch.nn.Linear(self.hidden_size, self.prediction_size)
+        
+        # Create the final non-linearity.
+        self.use_logsoftmax = self.config["use_logsoftmax"]
+        if self.use_logsoftmax:
+            if self.prediction_mode == "Dense":
+                # Used then returning dense prediction, i.e. every output of unfolded model.
+                self.log_softmax = torch.nn.LogSoftmax(dim=2)
+            else:
+                # Used when returning only the last output.
+                self.log_softmax = torch.nn.LogSoftmax(dim=1)
+
+    def activation2output(self, activations):
+        output = self.dropout(activations)
+
+        if(self.ffn_output):
+            #output = activations.squeeze(1)
+            shape = activations.shape
+
+            # Reshape to 2D tensor [BATCH_SIZE * SEQ_LEN x HIDDEN_SIZE]
+            output = output.contiguous().view(-1, shape[2])
+
+            # Propagate data through the output layer [BATCH_SIZE * SEQ_LEN x PREDICTION_SIZE]
+            output = self.activation2output_layer(output)
+            #output = output.unsqueeze(1)
+
+            # Reshape back to 3D tensor [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]
+            output = output.view(shape[0], shape[1], output.size(1))
+
+        return output
+
+
+    def input_data_definitions(self):
+        """ 
+        Function returns a dictionary with definitions of input data that are required by the component.
+
+        :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        d = {}
+
+        d[self.key_inputs] = DataDefinition([-1, -1, self.hidden_size], [torch.Tensor], "Batch of encoder outputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]")
+
+        # Input hidden state
+        d[self.key_input_state] = DataDefinition([-1, 1, self.hidden_size], [torch.Tensor], "Batch of RNN last hidden states passed from another RNN that will be used as initial [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]")
+
+        return d
+
+    def output_data_definitions(self):
+        """ 
+        Function returns a dictionary with definitions of output data produced the component.
+
+        :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
+        """
+        d = {}
+    
+        if self.prediction_mode == "Dense":
+            d[self.key_predictions] = DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
+        elif self.prediction_mode == "Last": # "Last"
+            # Only last prediction.
+            d[self.key_predictions] = DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
+
+        # Output hidden state stream TODO: why do we need that?
+        if self.output_last_state:
+            d[self.key_output_state] = DataDefinition([-1, 1, self.hidden_size], [torch.Tensor], "Batch of RNN final hidden states [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]")
+        
+        return d
+
+    def forward(self, data_dict):
+        """
+        Forward pass of the model.
+
+        :param data_dict: DataDict({'inputs', 'predictions ...}), where:
+
+            - inputs: expected inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE],
+            - predictions: returned output with predictions (log_probs) [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]
+        """
+        
+        inputs = data_dict[self.key_inputs]
+        batch_size = inputs.shape[0]
+        #print("{}: input shape: {}, device: {}\n".format(self.name, inputs.shape, inputs.device))
+
+        # Initialize hidden state from inputs - as last hidden state from external component.
+        hidden = data_dict[self.key_input_state]
+        # For RNNs (aside of LSTM): [BATCH_SIZE x NUM_LAYERS x HIDDEN_SIZE] -> [NUM_LAYERS x BATCH_SIZE x HIDDEN_SIZE]
+        hidden = hidden.transpose(0,1)
+        #print("{}: hidden shape: {}, device: {}\n".format(self.name, hidden.shape, hidden.device))
+
+        # List that will contain the output sequence
+        activations = []
+
+        # First input to the decoder - trainable "start of sequence" token
+        activations_partial = self.sos_token.expand(batch_size, -1).unsqueeze(1)
+
+        # Feed back the outputs iteratively
+        for i in range(self.autoregression_length):
+
+            # Do the attention thing
+            attn_weights = torch.nn.functional.softmax(
+                self.attn(torch.cat((activations_partial.transpose(0, 1), hidden), 2)),
+                dim=2
+            )
+            attn_applied = torch.bmm(attn_weights.transpose(0, 1), inputs)
+            activations_partial = torch.cat((activations_partial, attn_applied), 2)
+            activations_partial = self.attn_combine(activations_partial)
+            activations_partial = torch.nn.functional.relu(activations_partial)
+
+            # Feed through the RNN
+            activations_partial, hidden = self.rnn_cell(activations_partial, hidden)
+            activations_partial = self.activation2output(activations_partial)
+
+            # Add the single step output into list
+            if self.prediction_mode == "Dense":
+                activations += [activations_partial]
+
+        # Reassemble all the outputs from list into an output tensor
+        if self.prediction_mode == "Dense":
+            outputs = torch.cat(activations, 1)
+            # Log softmax - along PREDICTION dim.
+            if self.use_logsoftmax:
+                outputs = self.log_softmax(outputs)
+            # Add predictions to datadict.
+            data_dict.extend({self.key_predictions: outputs})
+        elif self.prediction_mode == "Last":
+            if self.use_logsoftmax:
+                outputs = self.log_softmax(activations_partial.squeeze(1))
+            # Add predictions to datadict.
+            data_dict.extend({self.key_predictions: outputs})
+
+        # Output last hidden state, if requested
+        if self.output_last_state:
+            # For others: [NUM_LAYERS x BATCH_SIZE x HIDDEN_SIZE] -> [BATCH_SIZE x NUM_LAYERS x HIDDEN_SIZE] 
+            hidden = hidden.transpose(0,1)
+            # Export last hidden state.
+            data_dict.extend({self.key_output_state: hidden})

From ed6819076f4840a3f577cbf28fd1609b63cff11c Mon Sep 17 00:00:00 2001
From: Tomasz Kornuta <tkornut@us.ibm.com>
Date: Mon, 6 May 2019 13:41:59 -0700
Subject: [PATCH 9/9] removed doubled attn_decoder_rnn..py

---
 ptp/components/models/attn_decoder_rnn..py | 242 ---------------------
 1 file changed, 242 deletions(-)
 delete mode 100644 ptp/components/models/attn_decoder_rnn..py

diff --git a/ptp/components/models/attn_decoder_rnn..py b/ptp/components/models/attn_decoder_rnn..py
deleted file mode 100644
index 32a2c14..0000000
--- a/ptp/components/models/attn_decoder_rnn..py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Copyright (C) Alexis Asseman, IBM Corporation 2019
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-__author__ = "Alexis Asseman"
-
-import torch
-
-from ptp.configuration.configuration_error import ConfigurationError
-from ptp.components.models.model import Model
-from ptp.data_types.data_definition import DataDefinition
-
-
-class Attn_Decoder_RNN(Model): 
-    """
-    Single layer GRU decoder with attention:
-    Bahdanau, D., Cho, K., & Bengio, Y. (2014). Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473.
-    
-    Needs the full sequence of hidden states from the encoder as input, as well as the last hidden state from the encoder as input state.
-
-    Code is based on https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html.
-    """
-    def __init__(self, name, config):
-        """
-        Initializes the model.
-
-        :param config: Dictionary of parameters (read from configuration ``.yaml`` file).
-        :type config: ``ptp.configuration.ConfigInterface``
-        """
-        # Call constructors of parent classes.
-        Model.__init__(self, name, Attn_Decoder_RNN, config)
-
-        # Get input/output mode
-        self.output_last_state = self.config["output_last_state"]
-        self.ffn_output = self.config["ffn_output"]
-
-        # Get prediction mode from configuration.
-        self.prediction_mode = self.config["prediction_mode"]
-        if self.prediction_mode not in ['Dense','Last', 'None']:
-            raise ConfigurationError("Invalid 'prediction_mode' (current {}, available {})".format(self.prediction_mode, ['Dense','Last', 'None']))
-
-        self.autoregression_length = self.config["autoregression_length"]
-
-        # Retrieve input size from global variables.
-        self.key_input_size = self.global_keys["input_size"]
-        self.input_size = self.globals["input_size"]
-        if type(self.input_size) == list:
-            if len(self.input_size) == 1:
-                self.input_size = self.input_size[0]
-            else:
-                raise ConfigurationError("RNN input size '{}' must be a single dimension (current {})".format(self.key_input_size, self.input_size))
-
-        # Retrieve output (prediction) size from global params.
-        self.prediction_size = self.globals["prediction_size"]
-        if type(self.prediction_size) == list:
-            if len(self.prediction_size) == 1:
-                self.prediction_size = self.prediction_size[0]
-            else:
-                raise ConfigurationError("RNN prediction size '{}' must be a single dimension (current {})".format(self.key_prediction_size, self.prediction_size))
-        
-        # Retrieve hidden size from configuration.
-        self.hidden_size = self.config["hidden_size"]
-        if type(self.hidden_size) == list:
-            if len(self.hidden_size) == 1:
-                self.hidden_size = self.hidden_size[0]
-            else:
-                raise ConfigurationError("RNN hidden_size must be a single dimension (current {})".format(self.hidden_size))
-        
-        # Get dropout rate value from config.
-        dropout_rate = self.config["dropout_rate"]
-
-        # Create dropout layer.
-        self.dropout = torch.nn.Dropout(dropout_rate)
-
-        # Create rnn cell: hardcoded one layer GRU.
-        self.rnn_cell = getattr(torch.nn, "GRU")(self.input_size, self.hidden_size, 1, dropout=dropout_rate, batch_first=True)
-
-        # Create layers for the attention
-        self.attn = torch.nn.Linear(self.hidden_size * 2, self.autoregression_length)
-        self.attn_combine = torch.nn.Linear(self.hidden_size * 2, self.hidden_size)
-
-        # Create the trainable initial input for the decoder (A trained <SOS> token of sorts)
-        self.sos_token = torch.zeros(1, self.input_size)
-        torch.nn.init.xavier_uniform(self.sos_token)
-        self.sos_token = torch.nn.Parameter(self.sos_token, requires_grad=True)
-
-        # Get key mappings.
-        self.key_inputs = self.stream_keys["inputs"]
-        self.key_predictions = self.stream_keys["predictions"]
-        self.key_input_state = self.stream_keys["input_state"]
-        if self.output_last_state:
-            self.key_output_state = self.stream_keys["output_state"]
-        
-        self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size))
-
-        # Create the output layer.
-        self.activation2output_layer = None
-        if(self.ffn_output):
-            self.activation2output_layer = torch.nn.Linear(self.hidden_size, self.prediction_size)
-        
-        # Create the final non-linearity.
-        self.use_logsoftmax = self.config["use_logsoftmax"]
-        if self.use_logsoftmax:
-            if self.prediction_mode == "Dense":
-                # Used then returning dense prediction, i.e. every output of unfolded model.
-                self.log_softmax = torch.nn.LogSoftmax(dim=2)
-            else:
-                # Used when returning only the last output.
-                self.log_softmax = torch.nn.LogSoftmax(dim=1)
-
-    def activation2output(self, activations):
-        output = self.dropout(activations)
-
-        if(self.ffn_output):
-            #output = activations.squeeze(1)
-            shape = activations.shape
-
-            # Reshape to 2D tensor [BATCH_SIZE * SEQ_LEN x HIDDEN_SIZE]
-            output = output.contiguous().view(-1, shape[2])
-
-            # Propagate data through the output layer [BATCH_SIZE * SEQ_LEN x PREDICTION_SIZE]
-            output = self.activation2output_layer(output)
-            #output = output.unsqueeze(1)
-
-            # Reshape back to 3D tensor [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]
-            output = output.view(shape[0], shape[1], output.size(1))
-
-        return output
-
-
-    def input_data_definitions(self):
-        """ 
-        Function returns a dictionary with definitions of input data that are required by the component.
-
-        :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
-        """
-        d = {}
-
-        d[self.key_inputs] = DataDefinition([-1, -1, self.hidden_size], [torch.Tensor], "Batch of encoder outputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]")
-
-        # Input hidden state
-        d[self.key_input_state] = DataDefinition([-1, 1, self.hidden_size], [torch.Tensor], "Batch of RNN last hidden states passed from another RNN that will be used as initial [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]")
-
-        return d
-
-    def output_data_definitions(self):
-        """ 
-        Function returns a dictionary with definitions of output data produced the component.
-
-        :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`).
-        """
-        d = {}
-    
-        if self.prediction_mode == "Dense":
-            d[self.key_predictions] = DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
-        elif self.prediction_mode == "Last": # "Last"
-            # Only last prediction.
-            d[self.key_predictions] = DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]")
-
-        # Output hidden state stream TODO: why do we need that?
-        if self.output_last_state:
-            d[self.key_output_state] = DataDefinition([-1, 1, self.hidden_size], [torch.Tensor], "Batch of RNN final hidden states [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]")
-        
-        return d
-
-    def forward(self, data_dict):
-        """
-        Forward pass of the model.
-
-        :param data_dict: DataDict({'inputs', 'predictions ...}), where:
-
-            - inputs: expected inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE],
-            - predictions: returned output with predictions (log_probs) [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]
-        """
-        
-        inputs = data_dict[self.key_inputs]
-        batch_size = inputs.shape[0]
-        #print("{}: input shape: {}, device: {}\n".format(self.name, inputs.shape, inputs.device))
-
-        # Initialize hidden state from inputs - as last hidden state from external component.
-        hidden = data_dict[self.key_input_state]
-        # For RNNs (aside of LSTM): [BATCH_SIZE x NUM_LAYERS x HIDDEN_SIZE] -> [NUM_LAYERS x BATCH_SIZE x HIDDEN_SIZE]
-        hidden = hidden.transpose(0,1)
-        #print("{}: hidden shape: {}, device: {}\n".format(self.name, hidden.shape, hidden.device))
-
-        # List that will contain the output sequence
-        activations = []
-
-        # First input to the decoder - trainable "start of sequence" token
-        activations_partial = self.sos_token.expand(batch_size, -1).unsqueeze(1)
-
-        # Feed back the outputs iteratively
-        for i in range(self.autoregression_length):
-
-            # Do the attention thing
-            attn_weights = torch.nn.functional.softmax(
-                self.attn(torch.cat((activations_partial.transpose(0, 1), hidden), 2)),
-                dim=2
-            )
-            attn_applied = torch.bmm(attn_weights.transpose(0, 1), inputs)
-            activations_partial = torch.cat((activations_partial, attn_applied), 2)
-            activations_partial = self.attn_combine(activations_partial)
-            activations_partial = torch.nn.functional.relu(activations_partial)
-
-            # Feed through the RNN
-            activations_partial, hidden = self.rnn_cell(activations_partial, hidden)
-            activations_partial = self.activation2output(activations_partial)
-
-            # Add the single step output into list
-            if self.prediction_mode == "Dense":
-                activations += [activations_partial]
-
-        # Reassemble all the outputs from list into an output tensor
-        if self.prediction_mode == "Dense":
-            outputs = torch.cat(activations, 1)
-            # Log softmax - along PREDICTION dim.
-            if self.use_logsoftmax:
-                outputs = self.log_softmax(outputs)
-            # Add predictions to datadict.
-            data_dict.extend({self.key_predictions: outputs})
-        elif self.prediction_mode == "Last":
-            if self.use_logsoftmax:
-                outputs = self.log_softmax(activations_partial.squeeze(1))
-            # Add predictions to datadict.
-            data_dict.extend({self.key_predictions: outputs})
-
-        # Output last hidden state, if requested
-        if self.output_last_state:
-            # For others: [NUM_LAYERS x BATCH_SIZE x HIDDEN_SIZE] -> [BATCH_SIZE x NUM_LAYERS x HIDDEN_SIZE] 
-            hidden = hidden.transpose(0,1)
-            # Export last hidden state.
-            data_dict.extend({self.key_output_state: hidden})