From 1941d83dacca66dd0614bdf9b885f445b7f7589b Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 17 Apr 2019 13:02:24 -0700 Subject: [PATCH 1/9] Changed default out_of_vocabulary value to -100, which is used by PyTorch to indicate targets that will be omitted --- configs/default/components/masking/string_to_mask.yml | 7 +++++-- configs/default/components/text/label_indexer.yml | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/configs/default/components/masking/string_to_mask.yml b/configs/default/components/masking/string_to_mask.yml index bcc363d..2b74456 100644 --- a/configs/default/components/masking/string_to_mask.yml +++ b/configs/default/components/masking/string_to_mask.yml @@ -4,9 +4,12 @@ # 1. CONFIGURATION PARAMETERS that will be LOADED by the component. #################################################################### -# Value that will be used when word is out of vocavbulary (LOADED) +# Value that will be used when word is out of vocabulary (LOADED) # (Mask for that element will be 0 as well) -out_of_vocabulary_value: -1 +# -100 is the default value used by PyTroch loss functions to specify +# target values that will ignored and does not contribute to the input gradient. +# (ignore_index=-100) +out_of_vocabulary_value: -100 streams: #################################################################### diff --git a/configs/default/components/text/label_indexer.yml b/configs/default/components/text/label_indexer.yml index 5b871e9..bfe9aa0 100644 --- a/configs/default/components/text/label_indexer.yml +++ b/configs/default/components/text/label_indexer.yml @@ -25,9 +25,12 @@ import_word_mappings_from_globals: False # Flag informing whether word mappings will be exported to globals (LOADED) export_word_mappings_to_globals: False -# Value that will be used when word is out of vocavbulary (LOADED) +# Value that will be used when word is out of vocabulary (LOADED) # (Mask for that element will be 0 as well) -out_of_vocabulary_value: -1 +# -100 is the default value used by PyTroch loss functions to specify +# target values that will ignored and does not contribute to the input gradient. +# (ignore_index=-100) +out_of_vocabulary_value: -100 streams: #################################################################### From 349b5903a80634be5e7d5cecf7882437d35d98b1 Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 17 Apr 2019 13:02:46 -0700 Subject: [PATCH 2/9] c1 + c2 + Y/N multimodal config --- ...nn_shared_all_encoders_two_ffns_losses.yml | 2 +- ...nn_shared_all_encoders_two_ffns_losses.yml | 472 ++++++++++++++++++ 2 files changed, 473 insertions(+), 1 deletion(-) create mode 100644 configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml index 4ffc007..6996f91 100644 --- a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml @@ -394,7 +394,7 @@ pipeline: # Viewers. viewer: type: StreamViewer - priority: 4.3 + priority: 7.3 input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_binary_masks,pipe6_binary_answers_ids,pipe6_binary_predictions, pipe7_merged_predictions diff --git a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml new file mode 100644 index 0000000..efa6580 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml @@ -0,0 +1,472 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C3 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1_c3_binary_yn.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1,C3 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100,{"C1": 0}, {"C3": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + #vocabulary_size: vocabulary_size_all_c1_c3_binary + word_mappings: word_mappings_all_c1_c3_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 question ################# + + # Answer encoding for PIPE 5. + pipe5_c1_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe5_c1_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe5_c1_masks + + # Model 4: FFN C1 answering + pipe5_c1_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c1_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_without_yn + + pipe5_c1_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c1_predictions + masks: pipe5_c1_masks + targets: pipe5_c1_answers_without_yn_ids + loss: pipe5_c1_loss + + pipe5_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c1_masks + predictions: pipe5_c1_predictions + targets: pipe5_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + #num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe5_c1_precision + recall: pipe5_c1_recall + f1score: pipe5_c1_f1score + + ################# PIPE 6: C3 question ################# + + # Answer encoding for PIPE 6. + pipe6_c3_answer_indexer: + type: LabelIndexer + priority: 6.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c3.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_c3_answers_ids + globals: + vocabulary_size: vocabulary_size_c3 + word_mappings: word_mappings_c3 + + # Sample masking based on categories. + pipe6_c3_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_c3_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c3_question_categories_indices # NOT USED + masks: pipe6_c3_masks + + # Model 4: FFN C1 answering + pipe6_c3_ffn: + priority: 6.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c3_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c3 + + pipe6_c3_nllloss: + type: NLLLoss + priority: 6.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_c3_predictions + masks: pipe6_c3_masks + targets: pipe6_c3_answers_ids + loss: pipe6_c3_loss + + pipe6_c3_precision_recall: + type: PrecisionRecallStatistics + priority: 6.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_c3_masks + predictions: pipe6_c3_predictions + targets: pipe6_c3_answers_ids + globals: + word_mappings: word_mappings_c3 + #num_classes: vocabulary_size_c3 + statistics: + precision: pipe6_c3_precision + recall: pipe6_c3_recall + f1score: pipe6_c3_f1score + + ################# PIPE 7: BINARY question ################# + + # Answer encoding for pipe 7. + pipe7_binary_answer_indexer: + type: LabelIndexer + priority: 7.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe7_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe7_binary_string_to_mask: + priority: 7.2 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe7_binary_masks + + # Model 4: FFN C1 answering + pipe7_binary_ffn: + priority: 7.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe7_binary_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + + pipe7_binary_nllloss: + type: NLLLoss + priority: 7.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe7_binary_predictions + masks: pipe7_binary_masks + targets: pipe7_binary_answers_ids + loss: pipe7_binary_loss + + pipe7_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 7.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe7_binary_masks + predictions: pipe7_binary_predictions + targets: pipe7_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + #num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe7_binary_precision + recall: pipe7_binary_recall + f1score: pipe7_binary_f1score + + ################# PIPE 8: MERGE ANSWERS ################# + + # Merge predictions + pipe8_merged_predictions: + type: JoinMaskedPredictions + priority: 8.1 + # Names of used input streams. + input_prediction_streams: [pipe5_c1_predictions, pipe6_c3_predictions, pipe7_binary_predictions] + input_mask_streams: [pipe5_c1_masks, pipe6_c3_masks, pipe7_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c3, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_c3_binary + streams: + output_strings: pipe8_merged_predictions + output_indices: pipe8_merged_pred_indices + + # Statistics. + pipe8_merged_precision_recall: + type: PrecisionRecallStatistics + priority: 8.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_c3_binary + streams: + targets: all_answers_ids + predictions: pipe8_merged_pred_indices + statistics: + precision: pipe8_merged_precision + recall: pipe8_merged_recall + f1score: pipe8_merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 8.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c3_masks,pipe6_c3_answers_ids,pipe6_c3_predictions, pipe7_binary_masks,pipe7_binary_answers_ids,pipe7_binary_predictions, pipe8_merged_predictions + + +#: pipeline From cadc3da8a4339e9ea25832f2fcdbd717f0e29e7c Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 17 Apr 2019 14:17:27 -0700 Subject: [PATCH 3/9] c2 multimodal model --- ...c2_classification_all_rnn_vgg16_concat.yml | 101 ++++++++++++++++++ .../default_c2_classification.yml | 91 ++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml create mode 100644 configs/vqa_med_2019/c2_classification/default_c2_classification.yml diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml new file mode 100644 index 0000000..d4745b6 --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml @@ -0,0 +1,101 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml + +pipeline: + name: vqa_med_c2_classification_all_rnn_vgg_concat + + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [100, 2, 10, 100] + + # First subpipeline: question. + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + use_logsoftmax: False + initial_state_trainable: False + #num_layers: 5 + hidden_size: 50 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_embeddings_output_size + + # 2nd subpipeline: image size. + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 2.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 3rd subpipeline: image. + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 3.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + type: Concatenation + priority: 4.1 + input_streams: [question_activations,image_size_activations,image_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10],[-1,100]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: output_size + + + classifier: + type: FeedForwardNetwork + hidden_sizes: [100] + priority: 4.2 + streams: + inputs: concatenated_activations + globals: + input_size: output_size + prediction_size: vocabulary_size_c2 + + + #: pipeline diff --git a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml new file mode 100644 index 0000000..3df45b4 --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml @@ -0,0 +1,91 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C2 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c2.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C2 + dataloader: + num_workers: 4 + + +pipeline: + + # Answer encoding. + answer_indexer: + type: LabelIndexer + priority: 0.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c2.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c2 + word_mappings: word_mappings_c2 + + + # Predictions decoder. + prediction_decoder: + type: WordDecoder + priority: 10.1 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: predictions + outputs: predicted_answers + globals: + vocabulary_size: vocabulary_size_c2 + word_mappings: word_mappings_c2 + + # Loss + nllloss: + type: NLLLoss + priority: 10.2 + targets_dim: 1 + streams: + targets: answers_ids + loss: loss + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: answers_ids + + precision_recall: + type: PrecisionRecallStatistics + priority: 100.3 + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + streams: + targets: answers_ids + globals: + word_mappings: word_mappings_c2 + num_classes: vocabulary_size_c2 + + # Viewers. + viewer: + type: StreamViewer + priority: 100.4 + input_streams: questions,category_names,answers,predicted_answers + +#: pipeline From b9d3cd52441bb21e33f61e2ab1f3d68636369a90 Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 17 Apr 2019 14:20:16 -0700 Subject: [PATCH 4/9] rename c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses --- ...binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename configs/vqa_med_2019/vf/{c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml => c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml} (99%) diff --git a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml similarity index 99% rename from configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml rename to configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml index efa6580..3b1d952 100644 --- a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml @@ -20,7 +20,7 @@ validation: pipeline: - name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses + name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses ################# PIPE 0: SHARED ################# From 5825c76ed645c5dbc2de3bbff2394cd5a78d4f9b Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:17:18 -0700 Subject: [PATCH 5/9] microupdate of wikitext lm rnn config --- configs/wikitext/wikitext_language_modeling_rnn.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml index 0d5a5c3..88274b9 100644 --- a/configs/wikitext/wikitext_language_modeling_rnn.yml +++ b/configs/wikitext/wikitext_language_modeling_rnn.yml @@ -72,7 +72,7 @@ pipeline: lstm: type: RecurrentNeuralNetwork priority: 3 - #initial_state_trainable: False + initial_state_trainable: False streams: inputs: embedded_sources globals: @@ -88,4 +88,10 @@ pipeline: targets: indexed_targets loss: loss + # Viewers. + viewer: + type: StreamViewer + priority: 100.1 + input_streams: sources,indexed_targets,targets,predictions + #: pipeline From c7c183b7c73350f54463711e31b5cf54daf7e88c Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:41:06 -0700 Subject: [PATCH 6/9] c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml --- ...n_shared_all_encoders_four_ffns_losses.yml | 542 ++++++++++++++++++ 1 file changed, 542 insertions(+) create mode 100644 configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml new file mode 100644 index 0000000..065a14e --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml @@ -0,0 +1,542 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c2_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0}, {"C2": 0}, {"C3": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + #vocabulary_size: vocabulary_size_all_c1_c2_c3_binary + word_mappings: word_mappings_all_c1_c2_c3_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 question ################# + + # Answer encoding for PIPE 5. + pipe5_c1_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe5_c1_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe5_c1_masks + + # Model 4: FFN C1 answering + pipe5_c1_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c1_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_without_yn + + pipe5_c1_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c1_predictions + masks: pipe5_c1_masks + targets: pipe5_c1_answers_without_yn_ids + loss: pipe5_c1_loss + + pipe5_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c1_masks + predictions: pipe5_c1_predictions + targets: pipe5_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + #num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe5_c1_precision + recall: pipe5_c1_recall + f1score: pipe5_c1_f1score + + ################# PIPE 6: C2 question ################# + + # Answer encoding for PIPE 6. + pipe6_c2_answer_indexer: + type: LabelIndexer + priority: 6.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c2.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_c2_answers_ids + globals: + vocabulary_size: vocabulary_size_c2 + word_mappings: word_mappings_c2 + + # Sample masking based on categories. + pipe6_c2_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_c2_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c2_question_categories_indices # NOT USED + masks: pipe6_c2_masks + + # Model 4: FFN C1 answering + pipe6_c2_ffn: + priority: 6.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c2_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c2 + + pipe6_c2_nllloss: + type: NLLLoss + priority: 6.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_c2_predictions + masks: pipe6_c2_masks + targets: pipe6_c2_answers_ids + loss: pipe6_c2_loss + + pipe6_c2_precision_recall: + type: PrecisionRecallStatistics + priority: 6.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_c2_masks + predictions: pipe6_c2_predictions + targets: pipe6_c2_answers_ids + globals: + word_mappings: word_mappings_c2 + statistics: + precision: pipe6_c2_precision + recall: pipe6_c2_recall + f1score: pipe6_c2_f1score + + ################# PIPE 7: C3 question ################# + + # Answer encoding for PIPE 7. + pipe7_c3_answer_indexer: + type: LabelIndexer + priority: 7.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c3.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe7_c3_answers_ids + globals: + vocabulary_size: vocabulary_size_c3 + word_mappings: word_mappings_c3 + + # Sample masking based on categories. + pipe7_c3_string_to_mask: + priority: 7.2 + type: StringToMask + globals: + word_mappings: category_c3_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c3_question_categories_indices # NOT USED + masks: pipe7_c3_masks + + # Model 4: FFN C1 answering + pipe7_c3_ffn: + priority: 7.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe7_c3_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c3 + + pipe7_c3_nllloss: + type: NLLLoss + priority: 7.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe7_c3_predictions + masks: pipe7_c3_masks + targets: pipe7_c3_answers_ids + loss: pipe7_c3_loss + + pipe7_c3_precision_recall: + type: PrecisionRecallStatistics + priority: 7.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe7_c3_masks + predictions: pipe7_c3_predictions + targets: pipe7_c3_answers_ids + globals: + word_mappings: word_mappings_c3 + #num_classes: vocabulary_size_c3 + statistics: + precision: pipe7_c3_precision + recall: pipe7_c3_recall + f1score: pipe7_c3_f1score + + ################# PIPE 8: BINARY question ################# + + # Answer encoding for pipe 8. + pipe8_binary_answer_indexer: + type: LabelIndexer + priority: 8.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe8_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe8_binary_string_to_mask: + priority: 8.2 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe8_binary_masks + + # Model 4: FFN C1 answering + pipe8_binary_ffn: + priority: 8.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe8_binary_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + + pipe8_binary_nllloss: + type: NLLLoss + priority: 8.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe8_binary_predictions + masks: pipe8_binary_masks + targets: pipe8_binary_answers_ids + loss: pipe8_binary_loss + + pipe8_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 8.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe8_binary_masks + predictions: pipe8_binary_predictions + targets: pipe8_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + #num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe8_binary_precision + recall: pipe8_binary_recall + f1score: pipe8_binary_f1score + + ################# PIPE 9: MERGE ANSWERS ################# + + # Merge predictions. + pipe9_merged_predictions: + type: JoinMaskedPredictions + priority: 9.1 + # Names of used input streams. + input_prediction_streams: [pipe5_c1_predictions, pipe6_c2_predictions, pipe7_c3_predictions, pipe8_binary_predictions] + input_mask_streams: [pipe5_c1_masks, pipe6_c2_masks, pipe7_c3_masks, pipe8_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c2, word_mappings_c3, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_c2_c3_binary + streams: + output_strings: pipe9_merged_predictions + output_indices: pipe9_merged_pred_indices + + # Statistics. + pipe9_merged_precision_recall: + type: PrecisionRecallStatistics + priority: 9.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_c2_c3_binary + streams: + targets: all_answers_ids + predictions: pipe9_merged_pred_indices + statistics: + precision: pipe9_merged_precision + recall: pipe9_merged_recall + f1score: pipe9_merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions + + +#: pipeline From a9199b5acfeb3c2543690e40934af59e1547ef96 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:44:51 -0700 Subject: [PATCH 7/9] added viewing of streams related to C2 --- ...3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml index 065a14e..56ab04b 100644 --- a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml @@ -536,7 +536,7 @@ pipeline: viewer: type: StreamViewer priority: 9.3 - input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c2_masks,pipe6_c2_answers_ids,pipe6_c2_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions #: pipeline From 5967f1185f1e897122eca10cb2d842242aa94865 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:54:44 -0700 Subject: [PATCH 8/9] one ffn --- ...t_rnn_shared_all_encoders_one_ffn_loss.yml | 298 ++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml new file mode 100644 index 0000000..3c47597 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml @@ -0,0 +1,298 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c1_c2_c3_binary + word_mappings: word_mappings_all_c1_c2_c3_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 + C2 + C2 + Binary Y/N question ################# + + # Answer encoding for PIPE 5. + pipe5_all_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_all_answers_ids + globals: + vocabulary_size: vocabulary_size_c1_c2_c3_binary + word_mappings: word_mappings_all_c1_c2_c3_binary + + # Sample masking based on categories. + pipe5_all_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_c2_c3_binary_yn_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_c2_c3_binary_by_question_categories_indices # NOT USED + masks: pipe5_all_masks + + # Model 4: FFN C1 answering + pipe5_all_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_all_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_c2_c3_binary + + pipe5_all_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_all_predictions + masks: pipe5_all_masks + targets: pipe5_all_answers_ids + loss: pipe5_all_loss + + pipe5_all_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_all_masks + predictions: pipe5_all_predictions + targets: pipe5_all_answers_ids + globals: + word_mappings: word_mappings_all_c1_c2_c3_binary + statistics: + precision: pipe5_all_precision + recall: pipe5_all_recall + f1score: pipe5_all_f1score + + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_all_masks,pipe5_all_answers_without_yn_ids,pipe5_all_predictions + + +#: pipeline From 66b94c34028127bdee520116ac8ea160ed31ea00 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:55:18 -0700 Subject: [PATCH 9/9] rename c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss --- ...1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename configs/vqa_med_2019/vf/{c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml => c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml} (99%) diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml similarity index 99% rename from configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml rename to configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml index 3c47597..ef8f535 100644 --- a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml @@ -20,7 +20,7 @@ validation: pipeline: - name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss + name: c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss ################# PIPE 0: SHARED #################