IBM · tkornuta-ibm · May 6, 2019 · May 6, 2019 · May 6, 2019 · May 6, 2019
diff --git a/configs/default/components/models/attn_decoder_rnn..yml b/configs/default/components/models/attn_decoder_rnn..yml
@@ -0,0 +1,78 @@
+# This file defines the default values for the GRU decoder with attention.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Size of the hidden state (LOADED)
+hidden_size: 100
+
+# Wether to include the last hidden state in the outputs
+output_last_state: False
+
+# Type of recurrent cell (LOADED)
+# -> Only GRU is supported
+
+# Number of "stacked" layers (LOADED)
+# -> Only a single layer is supported
+
+# Dropout rate (LOADED)
+# Default: 0 (means that it is turned off)
+dropout_rate: 0
+
+# Prediction mode (LOADED)
+# Options: 
+#   * Dense (passes every activation through output layer) |
+#   * Last (passes only the last activation though output layer) |
+#   * None (all outputs are discarded)
+prediction_mode: Dense
+
+# Enable FFN layer at the output of the RNN (before eventual feed back in the case of autoregression).
+# Useful if the raw outputs of the RNN are needed, for attention encoder-decoder for example.
+ffn_output: True
+
+# Length of generated output sequence (LOADED)
+# User must set it per task, as it is task specific.
+autoregression_length: 10
+
+# If true, output of the last layer will be additionally processed with Log Softmax (LOADED)
+use_logsoftmax: True
+
+streams: 
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of encoder outputs (INPUT)
+  inputs: inputs
+
+  # Stream containing the inital state of the RNN (INPUT)
+  # The stream will be actually created only if `inital_state: Input`
+  input_state: input_state
+
+  # Stream containing predictions (OUTPUT)
+  predictions: predictions
+
+  # Stream containing the final output state of the RNN (output)
+  # The stream will be actually created only if `output_last_state: True`
+  output_state: output_state
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  # Size of the input (RETRIEVED)
+  input_size: input_size
+
+  # Size of the prediction (RETRIEVED)
+  prediction_size: prediction_size
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################
+
diff --git a/configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml b/configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml
@@ -0,0 +1,151 @@
+# Load config defining problems for training, validation and testing.
+default_configs:
+  vqa_med_2019/default_vqa_med_2019.yml,
+  vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml,
+  vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml
+
+c4_hyperparameters:
+  # In here I am putting some of the hyperparameters from spreadsheet.
+
+  question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
+
+  image_preprocessing: &image_preprocessing normalize
+  # Accepted formats: a,b,c or [a,b,c]
+  # none | random_affine | random_horizontal_flip | normalize | all
+
+  batch_size: &batch_size 256
+  preload_images: &preload_images False
+  num_workers: &num_workers 4
+
+# Training parameters:
+training:
+  problem:
+    batch_size: *batch_size
+    categories: C4
+    export_sample_weights: ~/data/vqa-med/answers.c4.weights.csv
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: *question_preprocessing
+    image_preprocessing: *image_preprocessing 
+    # Preload images.
+    preload_images: *preload_images
+    streams: 
+      questions: tokenized_questions
+  sampler:
+    weights: ~/data/vqa-med/answers.c4.weights.csv
+  # Use four workers for loading images.
+  dataloader:
+    num_workers: *num_workers
+
+  # Optimizer parameters:
+  optimizer:
+    name: Adam
+    lr: 0.0001
+
+  # Terminal conditions:
+  terminal_conditions:
+    loss_stop: 1.0e-3
+    episode_limit: 10000
+    epoch_limit: -1
+
+# Validation parameters:
+validation:
+  partial_validation_interval: 100
+  problem:
+    batch_size: *batch_size
+    categories: C4
+    # Appy all preprocessing/data augmentations.
+    question_preprocessing: *question_preprocessing
+    image_preprocessing: *image_preprocessing 
+    # Preload images: false, as we will need them only once, at the end.
+    preload_images: false
+    streams: 
+      questions: tokenized_questions
+  dataloader:
+    num_workers: 1
+
+
+pipeline:
+
+  ################# PIPE 6: C1 + C2 + C3 questions #################
+
+  # Answer encoding.
+  pipe6_c123_binary_yn_answer_indexer:
+    priority: 6.2
+    type: LabelIndexer
+    data_folder: ~/data/vqa-med
+    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
+    # Export mappings and size to globals.
+    export_word_mappings_to_globals: True
+    streams:
+      inputs: answers
+      outputs: answers_ids
+    globals:
+      vocabulary_size: vocabulary_size_c123_binary_yn
+      word_mappings: word_mappings_c123_binary_yn
+
+
+  # Model 4: FFN C123 answering
+  pipe6_c123_binary_yn_answer_classifier:
+    priority: 6.3
+    type: FeedForwardNetwork
+    hidden: *answer_classifier_hidden_sizes_val
+    dropout_rate: 0.5
+    streams:
+      inputs: concatenated_activations
+      predictions: pipe6_c123_predictions
+    globals:
+      input_size: concatenated_activations_size
+      prediction_size: vocabulary_size_c123_binary_yn
+
+  pipe6_c123_binary_yn_nllloss:
+    priority: 6.4
+    type: NLLLoss
+    targets_dim: 1
+    streams:
+      predictions: pipe6_c123_predictions
+      targets: answers_ids
+      loss: pipe6_c123_loss
+
+  pipe6_c123_binary_yn_precision_recall:
+    priority: 6.5
+    type: PrecisionRecallStatistics
+    use_word_mappings: True
+    show_class_scores: True
+    #show_confusion_matrix: True
+    streams:
+      predictions: pipe6_c123_predictions
+      targets: answers_ids
+    globals:
+      word_mappings: word_mappings_c123_binary_yn
+    statistics:
+      precision: pipe6_c123_precision
+      recall: pipe6_c123_recall
+      f1score: pipe6_c123_f1score
+
+  # C123 Predictions decoder.
+  pipe5_c123_binary_yn_prediction_decoder:
+    priority: 6.6
+    type: WordDecoder
+    # Use the same word mappings as label indexer.
+    import_word_mappings_from_globals: True
+    streams:
+      inputs: pipe6_c123_predictions
+      outputs: predicted_answers
+    globals:
+      word_mappings: word_mappings_c123_binary_yn
+
+  ################# PIPE 9: MERGE ANSWERS #################
+
+  # Viewers.
+  viewer:
+    priority: 9.3
+    type: StreamViewer
+    input_streams:
+      tokenized_questions,
+      category_names, predicted_category_names,
+      answers, predicted_answers
+
+
+#: pipeline