IBM · tkornuta-ibm · Apr 30, 2019 · Apr 10, 2019 · Apr 12, 2019 · Apr 29, 2019
diff --git a/configs/default/components/models/sentence_embeddings.yml b/configs/default/components/models/sentence_embeddings.yml
@@ -13,7 +13,7 @@ source_vocabulary_files: ''
 # Additional tokens that will be added to vocabulary (LOADED)
 # This list can be extended, but <PAD> and <EOS> are special tokens.
 # <PAD> is ALWAYS used for padding shorter sequences.
-additional_tokens: '<PAD>,<EOS>'
+additional_tokens: '<PAD>'
 
 # Enable <EOS> (end of sequence) token.
 eos_token: False

diff --git a/configs/default/components/models/vqa/attention.yml b/configs/default/components/models/vqa/attention.yml
@@ -0,0 +1,58 @@
+# This file defines the default values for the VQA_Attention model.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Dropout rate (LOADED)
+# Default: 0 (means that it is turned off)
+dropout_rate: 0
+
+# Size of the latent space (LOADED)
+latent_size: 100
+
+# Number of attention heads (LOADED)
+num_attention_heads: 2
+
+
+streams:
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of encoded images (INPUT)
+  feature_maps: feature_maps
+
+  # Stream containing batch of encoded questions (INPUT)
+  question_encodings: question_encodings
+
+  # Stream containing outputs (OUTPUT)
+  outputs: outputs
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  # Height of the features tensor (RETRIEVED)
+  feature_maps_height: feature_maps_height
+
+  # Width of the features tensor (RETRIEVED)
+  feature_maps_width: feature_maps_width
+
+  # Depth of the features tensor (RETRIEVED)
+  feature_maps_depth: feature_maps_depth
+
+  # Size of the question encodings input (RETRIEVED)
+  question_encoding_size: question_encoding_size
+
+  # Size of the output (RETRIEVED)
+  output_size: output_size
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
diff --git a/configs/default/components/publishers/bleu_statistics.yml b/configs/default/components/publishers/bleu_statistics.yml
@@ -13,6 +13,9 @@ use_prediction_distributions: True
 # TODO!
 #use_masking: False
 
+# Ignored words - useful for ignoring special tokens
+ignored_words: ["<PAD>", "<EOS>"]
+
 # Weights of n-grams used when calculating the score.
 weights: [0.25, 0.25, 0.25, 0.25]
 

diff --git a/configs/default/components/publishers/stream_file_exporter.yml b/configs/default/components/publishers/stream_file_exporter.yml
@@ -11,6 +11,9 @@ input_streams: ''
 # Separator that will be placed between values (LOADED)
 separator: ','
 
+# Adds additional line to output file enabling Excel to use different separator while loading (LOADED)
+export_separator_line_to_csv: False
+
 # Name of the file containing output values (LOADED)
 filename: 'outputs.txt'
 

diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml
@@ -5,9 +5,9 @@ training:
   problem:
     batch_size: 48
     # Appy all preprocessing/data augmentations.
-    image_preprocessing: all
+    image_preprocessing: normalize
     # none | random_affine | random_horizontal_flip | normalize | all
-    question_preprocessing: all
+    question_preprocessing: lowercase,remove_punctuation,tokenize
     # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
     streams: 
       # Problem is returning tokenized questions.
@@ -24,7 +24,6 @@ validation:
 
 
 pipeline:
-  name: c2_class_lstm_resnet152_ewm_cat_is
 
   global_publisher:
     priority: 0
@@ -96,6 +95,7 @@ pipeline:
     type: FeedForwardNetwork 
     hidden_sizes: [100]
     dropout_rate: 0.5
+    use_logsoftmax: False
     streams:
       inputs: element_wise_activations
       predictions: question_image_activations

diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml
@@ -5,9 +5,9 @@ training:
   problem:
     batch_size: 32
     # Appy all preprocessing/data augmentations.
-    image_preprocessing: all
+    image_preprocessing: normalize
     # none | random_affine | random_horizontal_flip | normalize | all
-    question_preprocessing: all
+    question_preprocessing: lowercase,remove_punctuation,tokenize
     # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
     streams: 
       # Problem is returning tokenized questions.
@@ -24,7 +24,6 @@ validation:
 
 
 pipeline:
-  name: c2_class_lstm_resnet152_rn_cat_is
 
   global_publisher:
     priority: 0

diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml
@@ -0,0 +1,102 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
+
+training:
+  problem:
+    batch_size: 48
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams:
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+validation:
+  problem:
+    batch_size: 48
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams:
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+
+pipeline:
+
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    # Add input_size to globals.
+    keys: [question_encoder_output_size, attention_activation_size, question_image_activation_size]
+    values: [100, 4196, 300]
+
+  ################# PIPE 0: question #################
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 100
+    pretrained_embeddings_file: glove.6B.100d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions
+
+  # Model 2: RNN
+  question_lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    use_logsoftmax: False
+    initial_state: Trainable
+    dropout_rate: 0.1
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  ################# PIPE 2: image #################
+  # Image encoder.
+  image_encoder:
+    priority: 3.1
+    type: TorchVisionWrapper
+    model_type: resnet50
+    return_feature_maps: True
+    streams:
+      inputs: images
+      outputs: feature_maps
+
+  ################# PIPE 3: image-question fusion  #################
+  # Attention + FF.
+  question_image_fusion:
+    priority: 4.1
+    type: VQA_Attention
+    dropout_rate: 0.5
+    latent_size: 100
+    num_attention_heads: 2
+    streams:
+      image_encodings: feature_maps
+      question_encodings: question_activations
+      outputs: attention_activations
+    globals:
+      question_encoding_size: question_encoder_output_size
+      output_size: attention_activation_size
+
+  classifier:
+    priority: 5.1
+    type: FeedForwardNetwork
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: attention_activations
+    globals:
+      input_size: attention_activation_size
+      prediction_size: vocabulary_size_c2
+
+
+  #: pipeline
diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml
@@ -0,0 +1,142 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
+
+training:
+  problem:
+    batch_size: 48
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+validation:
+  problem:
+    batch_size: 48
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: lowercase,remove_punctuation,tokenize
+    streams: 
+      # Problem is returning tokenized questions.
+      questions: tokenized_questions
+
+
+pipeline:
+
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    # Add input_size to globals.
+    keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size]
+    values: [100, 100, 100, 2, 10]
+
+  ################# PIPE 0: question #################
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 100
+    pretrained_embeddings_file: glove.6B.100d.txt
+    data_folder: ~/data/vqa-med
+    word_mappings_file: questions.all.word.mappings.csv
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+
+  # Model 2: RNN
+  question_lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    use_logsoftmax: False
+    initial_state: Trainable
+    dropout_rate: 0.1
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  ################# PIPE 2: image #################
+  # Image encoder.
+  image_encoder:
+    priority: 3.1
+    type: TorchVisionWrapper
+    model_type: resnet50
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ################# PIPE 3: image-question fusion  #################
+  # Element wise multiplication + FF.
+  question_image_fusion:
+    priority: 4.1
+    type: ElementWiseMultiplication
+    dropout_rate: 0.5
+    streams:
+      image_encodings: image_activations
+      question_encodings: question_activations
+      outputs: element_wise_activations
+    globals:
+      image_encoding_size: image_encoder_output_size
+      question_encoding_size: question_encoder_output_size
+      output_size: element_wise_activation_size
+
+  question_image_ffn:
+    priority: 4.2
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    use_logsoftmax: False
+    streams:
+      inputs: element_wise_activations
+      predictions: question_image_activations
+    globals:
+      input_size: element_wise_activation_size
+      prediction_size: element_wise_activation_size
+
+  ################# PIPE 5: image-question-image size fusion + classification #################
+  # Model - image size FFN.
+  image_size_encoder:
+    priority: 5.1
+    type: FeedForwardNetwork 
+    streams:
+      inputs: image_sizes
+      predictions: image_size_activations
+    globals:
+      input_size: image_size_encoder_input_size
+      prediction_size: image_size_encoder_output_size
+
+  # 4th subpipeline: concatenation + FF.
+  concat:
+    priority: 5.2
+    type: Concatenation
+    input_streams: [question_image_activations,image_size_activations]
+    # Concatenation 
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,10]]
+    output_dims: [-1,110]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatentated_activations_size
+
+
+  classifier:
+    priority: 5.3
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: concatentated_activations_size
+      prediction_size: vocabulary_size_c2
+
+
+  #: pipeline