Skip to content
This repository was archived by the owner on Jul 18, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions configs/default/components/models/attn_decoder_rnn..yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# This file defines the default values for the GRU decoder with attention.

####################################################################
# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
####################################################################

# Size of the hidden state (LOADED)
hidden_size: 100

# Wether to include the last hidden state in the outputs
output_last_state: False

# Type of recurrent cell (LOADED)
# -> Only GRU is supported

# Number of "stacked" layers (LOADED)
# -> Only a single layer is supported

# Dropout rate (LOADED)
# Default: 0 (means that it is turned off)
dropout_rate: 0

# Prediction mode (LOADED)
# Options:
# * Dense (passes every activation through output layer) |
# * Last (passes only the last activation though output layer) |
# * None (all outputs are discarded)
prediction_mode: Dense

# Enable FFN layer at the output of the RNN (before eventual feed back in the case of autoregression).
# Useful if the raw outputs of the RNN are needed, for attention encoder-decoder for example.
ffn_output: True

# Length of generated output sequence (LOADED)
# User must set it per task, as it is task specific.
autoregression_length: 10

# If true, output of the last layer will be additionally processed with Log Softmax (LOADED)
use_logsoftmax: True

streams:
####################################################################
# 2. Keymappings associated with INPUT and OUTPUT streams.
####################################################################

# Stream containing batch of encoder outputs (INPUT)
inputs: inputs

# Stream containing the inital state of the RNN (INPUT)
# The stream will be actually created only if `inital_state: Input`
input_state: input_state

# Stream containing predictions (OUTPUT)
predictions: predictions

# Stream containing the final output state of the RNN (output)
# The stream will be actually created only if `output_last_state: True`
output_state: output_state

globals:
####################################################################
# 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
####################################################################

# Size of the input (RETRIEVED)
input_size: input_size

# Size of the prediction (RETRIEVED)
prediction_size: prediction_size

####################################################################
# 4. Keymappings associated with GLOBAL variables that will be SET.
####################################################################

####################################################################
# 5. Keymappings associated with statistics that will be ADDED.
####################################################################

151 changes: 151 additions & 0 deletions configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# Load config defining problems for training, validation and testing.
default_configs:
vqa_med_2019/default_vqa_med_2019.yml,
vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml,
vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml

c4_hyperparameters:
# In here I am putting some of the hyperparameters from spreadsheet.

question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize
# Accepted formats: a,b,c or [a,b,c]
# none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all

image_preprocessing: &image_preprocessing normalize
# Accepted formats: a,b,c or [a,b,c]
# none | random_affine | random_horizontal_flip | normalize | all

batch_size: &batch_size 256
preload_images: &preload_images False
num_workers: &num_workers 4

# Training parameters:
training:
problem:
batch_size: *batch_size
categories: C4
export_sample_weights: ~/data/vqa-med/answers.c4.weights.csv
# Appy all preprocessing/data augmentations.
question_preprocessing: *question_preprocessing
image_preprocessing: *image_preprocessing
# Preload images.
preload_images: *preload_images
streams:
questions: tokenized_questions
sampler:
weights: ~/data/vqa-med/answers.c4.weights.csv
# Use four workers for loading images.
dataloader:
num_workers: *num_workers

# Optimizer parameters:
optimizer:
name: Adam
lr: 0.0001

# Terminal conditions:
terminal_conditions:
loss_stop: 1.0e-3
episode_limit: 10000
epoch_limit: -1

# Validation parameters:
validation:
partial_validation_interval: 100
problem:
batch_size: *batch_size
categories: C4
# Appy all preprocessing/data augmentations.
question_preprocessing: *question_preprocessing
image_preprocessing: *image_preprocessing
# Preload images: false, as we will need them only once, at the end.
preload_images: false
streams:
questions: tokenized_questions
dataloader:
num_workers: 1


pipeline:

################# PIPE 6: C1 + C2 + C3 questions #################

# Answer encoding.
pipe6_c123_binary_yn_answer_indexer:
priority: 6.2
type: LabelIndexer
data_folder: ~/data/vqa-med
word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
# Export mappings and size to globals.
export_word_mappings_to_globals: True
streams:
inputs: answers
outputs: answers_ids
globals:
vocabulary_size: vocabulary_size_c123_binary_yn
word_mappings: word_mappings_c123_binary_yn


# Model 4: FFN C123 answering
pipe6_c123_binary_yn_answer_classifier:
priority: 6.3
type: FeedForwardNetwork
hidden: *answer_classifier_hidden_sizes_val
dropout_rate: 0.5
streams:
inputs: concatenated_activations
predictions: pipe6_c123_predictions
globals:
input_size: concatenated_activations_size
prediction_size: vocabulary_size_c123_binary_yn

pipe6_c123_binary_yn_nllloss:
priority: 6.4
type: NLLLoss
targets_dim: 1
streams:
predictions: pipe6_c123_predictions
targets: answers_ids
loss: pipe6_c123_loss

pipe6_c123_binary_yn_precision_recall:
priority: 6.5
type: PrecisionRecallStatistics
use_word_mappings: True
show_class_scores: True
#show_confusion_matrix: True
streams:
predictions: pipe6_c123_predictions
targets: answers_ids
globals:
word_mappings: word_mappings_c123_binary_yn
statistics:
precision: pipe6_c123_precision
recall: pipe6_c123_recall
f1score: pipe6_c123_f1score

# C123 Predictions decoder.
pipe5_c123_binary_yn_prediction_decoder:
priority: 6.6
type: WordDecoder
# Use the same word mappings as label indexer.
import_word_mappings_from_globals: True
streams:
inputs: pipe6_c123_predictions
outputs: predicted_answers
globals:
word_mappings: word_mappings_c123_binary_yn

################# PIPE 9: MERGE ANSWERS #################

# Viewers.
viewer:
priority: 9.3
type: StreamViewer
input_streams:
tokenized_questions,
category_names, predicted_category_names,
answers, predicted_answers


#: pipeline
Loading