IBM · tkornuta-ibm · Apr 26, 2019 · Apr 23, 2019 · Apr 23, 2019 · Apr 23, 2019
diff --git a/configs/default/components/models/vqa/relational_network.yml b/configs/default/components/models/vqa/relational_network.yml
@@ -0,0 +1,55 @@
+# This file defines the default values for the ElementWiseMultiplication model.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Dropout rate (LOADED)
+# Default: 0 (means that it is turned off)
+dropout_rate: 0
+
+# Size of the output of g_theta network/output after concatenation (LOADED)
+output_size: 256
+
+streams: 
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of encoded images (INPUT)
+  feature_maps: feature_maps
+
+  # Stream containing batch of encoded questions (INPUT)
+  question_encodings: question_encodings
+
+  # Stream containing outputs (OUTPUT)
+  outputs: outputs
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  # Height of the features tensor (RETRIEVED)
+  feature_maps_height: feature_maps_height
+
+  # Width of the features tensor (RETRIEVED)
+  feature_maps_width: feature_maps_width
+
+  # Depth of the features tensor (RETRIEVED)
+  feature_maps_depth: feature_maps_depth
+
+  # Size of the question encodings input (RETRIEVED)
+  question_encoding_size: question_encoding_size
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  # Size of the output (SET)
+  output_size: output_size
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
+
diff --git a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml
@@ -15,10 +15,6 @@ split: training
 # Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4)
 categories: all
 
-# Removes punctuation (LOADED)
-# Options: none | questions | answers | all
-remove_punctuation: questions
-
 # Resize parameter (LOADED)
 # When present, resizes the images from original size to [height, width]
 # Depth remains set to 3.
@@ -28,9 +24,24 @@ remove_punctuation: questions
 # Problem will use those values to rescale the image_sizes to range (0, 1).
 scale_image_size: [2414, 2323]
 
-# Set augmentation parameter
-# Use random affine transformations (rotate, scale and translate)
-use_augmentation: False
+# Select applied image preprocessing/augmentations (LOADED)
+# Use one (or more) of the affine transformations:
+# none | random_affine | random_horizontal_flip | normalize | all
+# Accepted formats: a,b,c or [a,b,c]
+image_preprocessing: normalize
+
+# Select applied question preprocessing/augmentations (LOADED)
+# Use one (or more) of the transformations:
+# none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+# Accepted formats: a,b,c or [a,b,c]
+question_preprocessing: lowercase, remove_punctuation
+
+# Select applied question preprocessing (LOADED)
+# Use one (or more) of the transformations:
+# none | lowercase | remove_punctuation | tokenize | all
+# Accepted formats: a,b,c or [a,b,c]
+answer_preprocessing: none
+
 
 streams:
   ####################################################################

diff --git a/configs/default/components/text/sentence_tokenizer.yml b/configs/default/components/text/sentence_tokenizer.yml
@@ -8,6 +8,15 @@
 # False: sentence -> list of strings, True: list of strings -> sentence.
 detokenize: False 
 
+# Select applied preprocessing/augmentations (LOADED)
+# Use one (or more) of the transformations:
+# none | lowercase | remove_punctuation | all
+# Accepted formats: a,b,c or [a,b,c]
+preprocessing: none
+
+# List of characters to be removed 
+remove_characters: ''
+
 streams: 
   ####################################################################
   # 2. Keymappings associated with INPUT and OUTPUT streams.

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml
@@ -1,6 +1,14 @@
 # Load config defining problems for training, validation and testing.
 default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml
 
+# Training parameters:
+training:
+  problem:
+    batch_size: 64
+validation:
+  problem:
+    batch_size: 64
+
 pipeline:
   name: c2_classification_all_rnn_vgg16_ewm_size
 
@@ -24,8 +32,8 @@ pipeline:
   question_embeddings:
     priority: 1.2
     type: SentenceEmbeddings
-    embeddings_size: 50
-    pretrained_embeddings_file: glove.6B.50d.txt
+    embeddings_size: 100
+    pretrained_embeddings_file: glove.6B.100d.txt
     data_folder: ~/data/vqa-med
     word_mappings_file: questions.all.word.mappings.csv
     streams:
@@ -39,11 +47,9 @@ pipeline:
     cell_type: LSTM
     prediction_mode: Last
     use_logsoftmax: False
-<<<<<<< Updated upstream
-    initial_state_trainable: False
-=======
->>>>>>> Stashed changes
+    initial_state: Zero
     hidden_size: 50
+    #dropout_rate: 0.5
     streams:
       inputs: embedded_questions
       predictions: question_activations
@@ -120,7 +126,7 @@ pipeline:
   classifier:
     priority: 5.3
     type: FeedForwardNetwork 
-    hidden_sizes: [110]
+    hidden_sizes: [100]
     dropout_rate: 0.5
     streams:
       inputs: concatenated_activations

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml
@@ -72,7 +72,6 @@ pipeline:
   question_image_fusion:
     priority: 4.1
     type: MultimodalCompactBilinearPooling
-    dropout_rate: 0.5
     streams:
       image_encodings: image_activations
       question_encodings: question_activations

diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml
@@ -47,7 +47,7 @@ pipeline:
     cell_type: LSTM
     prediction_mode: Last
     use_logsoftmax: False
-    initial_state: Trainable
+    initial_state: Zero
     #dropout_rate: 0.5
     hidden_size: 50
     streams:

diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml
@@ -0,0 +1,63 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml
+
+# Training parameters:
+training:
+  problem:
+    batch_size: 128
+    remove_punctuation: all
+
+# Validation parameters:
+validation:
+  problem:
+    batch_size: 128
+    remove_punctuation: all
+
+pipeline:
+  name: c4_word_answer_onehot_bow
+
+  # Answer encoding.
+  answer_tokenizer:
+    type: SentenceTokenizer
+    priority: 1.1
+    preprocessing: lowercase,remove_punctuation
+    remove_characters: [“,”,’]
+    streams: 
+      inputs: answers
+      outputs: tokenized_answer_words
+
+  answer_onehot_encoder:
+    type: SentenceOneHotEncoder
+    priority: 1.2
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: tokenized_answer_words
+      outputs: encoded_answer_words
+    globals:
+      vocabulary_size: answer_words_vocabulary_size
+      word_mappings: answer_words_word_mappings
+
+  answer_bow_encoder:
+    type: BOWEncoder
+    priority: 1.3
+    streams:
+      inputs: encoded_answer_words
+      outputs: bow_answer_words
+    globals:
+        bow_size: answer_words_vocabulary_size
+
+  # Model.
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [500]
+    dropout_rate: 0.5
+    priority: 3
+    streams:
+      inputs: bow_answer_words
+    globals:
+      input_size: answer_words_vocabulary_size
+      prediction_size: vocabulary_size_c4
+
+#: pipeline
diff --git a/configs/vqa_med_2019/c4_classification/default_c4_classification.yml b/configs/vqa_med_2019/c4_classification/default_c4_classification.yml
@@ -0,0 +1,98 @@
+# Load config defining problems for training, validation and testing.
+default_configs: vqa_med_2019/default_vqa_med_2019.yml
+
+# Training parameters:
+training:
+  problem:
+    batch_size: 64
+    categories: C4
+  sampler:
+    name: WeightedRandomSampler
+    weights: ~/data/vqa-med/answers.c4.weights.csv
+  dataloader:
+    num_workers: 4
+  # Termination.
+  terminal_conditions:
+    loss_stop: 1.0e-2
+    episode_limit: 10000
+    epoch_limit: -1
+
+# Validation parameters:
+validation:
+  problem:
+    batch_size: 64
+    categories: C4
+  dataloader:
+    num_workers: 4
+
+
+pipeline:
+
+  # Answer encoding.
+  answer_indexer:
+    type: LabelIndexer
+    priority: 0.1
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c4.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c4
+      word_mappings: word_mappings_c4
+
+
+  # Predictions decoder.
+  prediction_decoder:
+    type: WordDecoder
+    priority: 10.1
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: predictions
+      outputs: predicted_answers
+    globals:
+      vocabulary_size: vocabulary_size_c4
+      word_mappings: word_mappings_c4
+
+  # Loss
+  nllloss:
+    type: NLLLoss
+    priority: 10.2
+    targets_dim: 1
+    streams:
+      targets: answers_ids
+      loss: loss
+
+  # Statistics.
+  batch_size:
+    type: BatchSizeStatistics
+    priority: 100.1
+
+  #accuracy:
+  #  type: AccuracyStatistics
+  #  priority: 100.2
+  #  streams:
+  #    targets: answers_ids
+
+  precision_recall:
+    type: PrecisionRecallStatistics
+    priority: 100.3
+    use_word_mappings: True
+    show_class_scores: True
+    show_confusion_matrix: True
+    streams:
+      targets: answers_ids
+    globals:
+      word_mappings: word_mappings_c4
+      num_classes: vocabulary_size_c4
+
+  # Viewers.
+  viewer:
+    type: StreamViewer
+    priority: 100.4
+    input_streams: questions,category_names,answers,predicted_answers
+
+#: pipeline
diff --git a/ptp/components/models/__init__.py b/ptp/components/models/__init__.py
@@ -10,6 +10,7 @@
 
 from .vqa.element_wise_multiplication import ElementWiseMultiplication
 from .vqa.multimodal_compact_bilinear_pooling import MultimodalCompactBilinearPooling
+from .vqa.relational_network import RelationalNetwork
 
 __all__ = [
     'ConvNetEncoder',
@@ -23,4 +24,5 @@
     'Seq2Seq_RNN',
     'ElementWiseMultiplication',
     'MultimodalCompactBilinearPooling',
+    'RelationalNetwork',
     ]