From e380eb690a4bffbde9f3265ea259a5a80ffd7c9d Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Mon, 6 May 2019 10:16:15 -0700 Subject: [PATCH 1/9] frozen pipeline for input fusion --- ...stm_resnet152_att_is_cat_ffn_c123_loss.yml | 2 +- ...stm_resnet152_mcb_is_cat_ffn_c123_loss.yml | 2 +- ...ve_lstm_vgg16_att_is_cat_ffn_c123_loss.yml | 6 +- .../c123_frozen_if_ffn_c123_loss.yml | 157 ++++++++++++++++++ ...nput_fusion_glove_lstm_vgg_att_is_cat.yml} | 85 ++++++++-- ...question_categorization_glove_rnn_ffn.yml} | 0 .../frozen_word_answer_glove_sum.yml | 0 7 files changed, 229 insertions(+), 23 deletions(-) create mode 100644 configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml rename configs/vqa_med_2019/{c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml => frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml} (65%) rename configs/vqa_med_2019/{question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml => frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml} (100%) create mode 100644 configs/vqa_med_2019/frozen_pipelines/frozen_word_answer_glove_sum.yml diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml index 232cbbe..a849432 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml @@ -1,5 +1,5 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_qc_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml index bca7a7f..87e8912 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml @@ -1,5 +1,5 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_qc_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml index 3f9aa05..586a990 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml @@ -40,9 +40,9 @@ hyperparameters: # Final classifier: FFN. answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] - batch_size: &batch_size 100 - preload_images: &preload_images True - num_workers: &num_workers 1 + batch_size: &batch_size 300 + preload_images: &preload_images False + num_workers: &num_workers 4 # Training parameters: training: diff --git a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml b/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml new file mode 100644 index 0000000..4a8783c --- /dev/null +++ b/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml @@ -0,0 +1,157 @@ +# Load config defining problems for training, validation and testing. +default_configs: + vqa_med_2019/default_vqa_med_2019.yml, + vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml, + vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml + +hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val) + question_image_size_fusion_size_val: &question_image_size_fusion_size_val 1134 + + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] + + batch_size: &batch_size 256 + preload_images: &preload_images False + num_workers: &num_workers 4 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding. + pipe6_all_answer_indexer: + priority: 6.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + # Model 4: FFN C123 answering + pipe6_c123_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe6_c123_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + loss: pipe6_c123_loss + + pipe6_c123_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c123_binary_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c123_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, predicted_category_names, + answers, predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml similarity index 65% rename from configs/vqa_med_2019/c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml rename to configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml index 4d554d1..11a9fcb 100644 --- a/configs/vqa_med_2019/c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml +++ b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml @@ -1,12 +1,13 @@ # Part of pipeline containing components constituting the "Inputs Fusion" pipeline. # Inputs: -# * -# * +# * tokenized_questions +# * images +# * image_sizes # Outputs: -# * -# * +# * concatenated_activations +# * concatenated_activations_size # "Inputs Fusion" # 0.: @@ -24,13 +25,49 @@ checkpoint: &checkpoint ~/image-clef-2019/experiments/c4_encoders/20190504_20244 # This one will be skipped, as this is C123 classifier! # + Model 'pipe6_c123_answer_classifier' [FeedForwardNetwork] params saved +pipe_if0_hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + # Image encoder. + image_encoder_model: &image_encoder_model vgg16 + # Options: vgg16 | densenet121 | resnet152 | resnet50 + #image_encoder_output_size_val: &image_encoder_output_size_val 100 + # INFO: this variable is not important, as we are using features in this pipeline!! + + # Question encoder. + question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt + # Options: '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt | glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled + question_encoder_embeddings_size_val: &question_encoder_embeddings_size_val 50 + question_encoder_lstm_size_val: &question_encoder_lstm_size_val 50 + question_encoder_output_size_val: &question_encoder_output_size_val 100 + + # Fusion I: image + question + question_image_fusion_type_val: &question_image_fusion_type VQA_Attention + # Options: ElementWiseMultiplication | VQA_Attention + #question_image_fusion_size_val: &question_image_fusion_size_val 1124 + # INFO: this variable is set by VQA_Attention component! + + # Image size encoder. + image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10 + + # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val) + question_image_size_fusion_size_val: &question_image_size_fusion_size_val 1134 + pipeline: ################# PIPE 0: SHARED ################# # Add global variables. - global_publisher: + pipe_if0_global_publisher: priority: 0.11 type: GlobalVariablePublisher # Add input_size to globals. @@ -38,7 +75,7 @@ pipeline: values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val] #, *image_encoder_output_size_val] #, *question_image_fusion_size_val] # Statistics. - batch_size: + pipe_if0_batch_size: priority: 0.12 type: BatchSizeStatistics @@ -61,7 +98,7 @@ pipeline: ################# PIPE 1: SHARED QUESTION ENCODER ################# # Model 1: question embeddings - pipe1_question_embeddings: + pipe_if1_question_embeddings: priority: 1.1 type: SentenceEmbeddings embeddings_size: *question_encoder_embeddings_size_val @@ -78,10 +115,10 @@ pipeline: inputs: tokenized_questions outputs: embedded_questions globals: - embeddings_size: pipe1_embeddings_size + embeddings_size: pipe_if1_embeddings_size # Model 2: question RNN - pipe1_lstm: + pipe_if1_lstm: priority: 1.2 type: RecurrentNeuralNetwork cell_type: LSTM @@ -92,20 +129,20 @@ pipeline: # LOAD AND FREEZE # load: file: *checkpoint - model: pipe1_question_embeddings + model: pipe1_lstm freeze: True ################### streams: inputs: embedded_questions predictions: question_activations globals: - input_size: pipe1_embeddings_size + input_size: pipe_if1_embeddings_size prediction_size: question_encoder_output_size ################# PIPE 2: SHARED IMAGE ENCODER ################# # Image encoder. - image_encoder: + pipe_if2_image_encoder: priority: 2.1 type: TorchVisionWrapper model: *image_encoder_model @@ -113,7 +150,7 @@ pipeline: # LOAD AND FREEZE # load: file: *checkpoint - model: pipe1_question_embeddings + model: image_encoder freeze: True ################### streams: @@ -123,14 +160,14 @@ pipeline: ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# # Model - image size classifier. - image_size_encoder: + pipe_if3_image_size_encoder: priority: 3.1 type: FeedForwardNetwork use_logsoftmax: False # LOAD AND FREEZE # load: file: *checkpoint - model: pipe1_question_embeddings + model: image_size_encoder freeze: True ################### streams: @@ -142,13 +179,19 @@ pipeline: ################# PIPE 4: image-question fusion ################# # Attention + FF. - question_image_fusion: + pipe_if4_question_image_fusion: priority: 4.1 type: *question_image_fusion_type dropout_rate: 0.5 # Attention params. latent_size: 100 num_attention_heads: 2 + # LOAD AND FREEZE # + load: + file: *checkpoint + model: question_image_fusion + freeze: True + ################### streams: image_encodings: feature_maps question_encodings: question_activations @@ -158,12 +201,18 @@ pipeline: output_size: fused_activation_size - question_image_ffn: + pipe_if4_question_image_ffn: priority: 4.2 type: FeedForwardNetwork #hidden_sizes: [*question_image_fusion_size_val] dropout_rate: 0.5 use_logsoftmax: False + # LOAD AND FREEZE # + load: + file: *checkpoint + model: question_image_ffn + freeze: True + ################### streams: inputs: fused_activations predictions: question_image_activations @@ -174,7 +223,7 @@ pipeline: ################# PIPE 5: image-question-image size fusion ################# # 5th subpipeline: concatenation - concat: + pipe_if5_concat: priority: 5.1 type: Concatenation input_streams: [question_image_activations,image_size_activations] diff --git a/configs/vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml similarity index 100% rename from configs/vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml rename to configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml diff --git a/configs/vqa_med_2019/frozen_pipelines/frozen_word_answer_glove_sum.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_word_answer_glove_sum.yml new file mode 100644 index 0000000..e69de29 From 2ef1df7c76444286883f066af968bf273377aa8c Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Mon, 6 May 2019 10:20:15 -0700 Subject: [PATCH 2/9] clenup --- .../example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml | 3 ++- .../tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml | 3 ++- .../tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml | 3 ++- .../tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml | 3 ++- .../tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml | 3 ++- .../tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index b93eb56..dc34516 100644 --- a/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml index a849432..2091e16 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml index 87e8912..cd28ae0 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml index 586a990..896b221 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index d718eeb..8f9d748 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml index 1b4363b..3ec7a77 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. From 479b479b374970ee92b788d819200206247d91fd Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Mon, 6 May 2019 11:20:07 -0700 Subject: [PATCH 3/9] working on input_fusion_io --- .../c123_frozen_if_ffn_c123_loss.yml | 10 +- ...input_fusion_glove_lstm_vgg_att_is_cat.yml | 3 + ..._question_categorization_glove_rnn_ffn.yml | 7 +- .../input_fusion_processor_io.yml | 122 ++++++++++++++++++ ptp/workers/processor.py | 36 ++++-- 5 files changed, 157 insertions(+), 21 deletions(-) create mode 100644 configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml diff --git a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml b/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml index 4a8783c..107f9ac 100644 --- a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml @@ -77,7 +77,7 @@ pipeline: ################# PIPE 6: C1 + C2 + C3 questions ################# # Answer encoding. - pipe6_all_answer_indexer: + pipe6_c123_binary_yn_answer_indexer: priority: 6.2 type: LabelIndexer data_folder: ~/data/vqa-med @@ -93,7 +93,7 @@ pipeline: # Model 4: FFN C123 answering - pipe6_c123_answer_classifier: + pipe6_c123_binary_yn_answer_classifier: priority: 6.3 type: FeedForwardNetwork hidden: *answer_classifier_hidden_sizes_val @@ -105,7 +105,7 @@ pipeline: input_size: concatenated_activations_size prediction_size: vocabulary_size_c123_binary_yn - pipe6_c123_nllloss: + pipe6_c123_binary_yn_nllloss: priority: 6.4 type: NLLLoss targets_dim: 1 @@ -114,7 +114,7 @@ pipeline: targets: answers_ids loss: pipe6_c123_loss - pipe6_c123_precision_recall: + pipe6_c123_binary_yn_precision_recall: priority: 6.5 type: PrecisionRecallStatistics use_word_mappings: True @@ -131,7 +131,7 @@ pipeline: f1score: pipe6_c123_f1score # C123 Predictions decoder. - pipe5_c123_prediction_decoder: + pipe5_c123_binary_yn_prediction_decoder: priority: 6.6 type: WordDecoder # Use the same word mappings as label indexer. diff --git a/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml index 11a9fcb..5f1d4c5 100644 --- a/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml +++ b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml @@ -1,12 +1,15 @@ # Part of pipeline containing components constituting the "Inputs Fusion" pipeline. # Inputs: +# streams: # * tokenized_questions # * images # * image_sizes # Outputs: +# streams: # * concatenated_activations +# globals: # * concatenated_activations_size # "Inputs Fusion" diff --git a/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml index 6ca6986..c144f57 100644 --- a/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml +++ b/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml @@ -19,6 +19,7 @@ # 0.56: pipe_qc_category_accuracy # Loaded checkpoint: 20190505_130406 +checkpoint: &checkpoint ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt pipeline: ################# PIPE: QUESTION CATEGORIZATION ################# @@ -37,7 +38,7 @@ pipeline: type: SentenceEmbeddings # LOAD AND FREEZE # load: - file: ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + file: *checkpoint model: question_embeddings freeze: True ################### @@ -58,7 +59,7 @@ pipeline: cell_type: LSTM # LOAD AND FREEZE # load: - file: ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + file: *checkpoint model: lstm freeze: True ################### @@ -78,7 +79,7 @@ pipeline: type: FeedForwardNetwork # LOAD AND FREEZE # load: - file: ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + file: *checkpoint model: classifier freeze: True ################### diff --git a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml new file mode 100644 index 0000000..c4615bd --- /dev/null +++ b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml @@ -0,0 +1,122 @@ +# Load config defining problems for training, validation and testing. +default_configs: + vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml, + vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml + + +training_validation: + problem: + type: &p_type VQAMED2019 + data_folder: &data_folder ~/data/vqa-med + split: training_validation + categories: all + resize_image: &resize_image [224, 224] + batch_size: 64 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + + dataloader: + # No sampler, process samples in the same order. + shuffle: false + # Use 1 worker, so batches will follow the samples order. + num_workers: 1 + + +hyperparams: + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] + + +# Add component for exporting answers to files. +pipeline: + name: input_fusion + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding. + pipe6_c123_binary_yn_answer_indexer: + priority: 6.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + # Model 4: FFN C123 answering + pipe6_c123_binary_yn_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe6_c123_binary_yn_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + loss: pipe6_c123_loss + + pipe6_c123_binary_yn_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c123_binary_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_binary_yn_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c123_binary_yn + + # Viewers. + viewer_extended: + priority: 100.4 + type: StreamViewer + sample_number: 0 + input_streams: + indices,image_ids,tokenized_questions, + category_names,predicted_categories, + answers,tokenized_answers,predicted_answers + + fused_inputs_exporter: + priority: 100.5 + type: StreamFileExporter + separator: '|' + filename: 'fused_inputs.csv' + export_separator_line_to_csv: False + input_streams: + indices + diff --git a/ptp/workers/processor.py b/ptp/workers/processor.py index b5afa68..c85cfbc 100644 --- a/ptp/workers/processor.py +++ b/ptp/workers/processor.py @@ -86,20 +86,24 @@ def setup_global_experiment(self): self.logger.error("Cannot use GPU as there are no CUDA-compatible devices present in the system!") exit(-1) + # Config that will be used. + abs_root_configs = None # Check if checkpoint file was indicated. - if chkpt_file == "": - print('Please pass path to and name of the file containing pipeline to be loaded as --load parameter') - exit(-2) + if chkpt_file != "": + #print('Please pass path to and name of the file containing pipeline to be loaded as --load parameter') + #exit(-2) - # Check if file with model exists. - if not path.isfile(chkpt_file): - print('Checkpoint file {} does not exist'.format(chkpt_file)) - exit(-3) + # Check if file with model exists. + if not path.isfile(chkpt_file): + print('Checkpoint file {} does not exist'.format(chkpt_file)) + exit(-3) - # Extract path. - self.abs_path, _ = path.split(path.dirname(path.expanduser(chkpt_file))) - print(self.abs_path) + # Extract path. + self.abs_path, _ = path.split(path.dirname(path.expanduser(chkpt_file))) + + # Use the "default" config. + abs_root_configs = [path.join(self.abs_path, 'training_configuration.yml')] # Check if config file was indicated by the user. if self.app_state.args.config != '': @@ -107,9 +111,15 @@ def setup_global_experiment(self): root_configs = self.app_state.args.config.replace(" ", "").split(',') # If there are - expand them to absolute paths. abs_root_configs = [path.expanduser(config) for config in root_configs] - else: - # Use the "default one". - abs_root_configs = [path.join(self.abs_path, 'training_configuration.yml')] + + # Use path to experiments. + self.abs_path = path.expanduser(self.app_state.args.expdir) + + + if abs_root_configs is None: + print('Please indicate configuration file to be used (--config) and/or pass path to and name of the file containing pipeline to be loaded (--load)') + exit(-2) + # Get the list of configurations which need to be loaded. configs_to_load = config_parsing.recurrent_config_parse(abs_root_configs, [], self.app_state.absolute_config_path) From 1786f165e17542045af32bd538bea40bfdea61e6 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Mon, 6 May 2019 11:35:15 -0700 Subject: [PATCH 4/9] exporting of fused inputs --- .../input_fusion_processor_io.yml | 61 ++----------------- ptp/workers/processor.py | 9 ++- 2 files changed, 13 insertions(+), 57 deletions(-) diff --git a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml index c4615bd..76392a6 100644 --- a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml +++ b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml @@ -31,7 +31,7 @@ hyperparams: # Add component for exporting answers to files. pipeline: - name: input_fusion + name: input_fusion_processor_io ################# PIPE 6: C1 + C2 + C3 questions ################# @@ -51,56 +51,6 @@ pipeline: word_mappings: word_mappings_c123_binary_yn - # Model 4: FFN C123 answering - pipe6_c123_binary_yn_answer_classifier: - priority: 6.3 - type: FeedForwardNetwork - hidden: *answer_classifier_hidden_sizes_val - dropout_rate: 0.5 - streams: - inputs: concatenated_activations - predictions: pipe6_c123_predictions - globals: - input_size: concatenated_activations_size - prediction_size: vocabulary_size_c123_binary_yn - - pipe6_c123_binary_yn_nllloss: - priority: 6.4 - type: NLLLoss - targets_dim: 1 - streams: - predictions: pipe6_c123_predictions - targets: answers_ids - loss: pipe6_c123_loss - - pipe6_c123_binary_yn_precision_recall: - priority: 6.5 - type: PrecisionRecallStatistics - use_word_mappings: True - show_class_scores: True - #show_confusion_matrix: True - streams: - predictions: pipe6_c123_predictions - targets: answers_ids - globals: - word_mappings: word_mappings_c123_binary_yn - statistics: - precision: pipe6_c123_precision - recall: pipe6_c123_recall - f1score: pipe6_c123_f1score - - # C123 Predictions decoder. - pipe5_c123_binary_yn_prediction_decoder: - priority: 6.6 - type: WordDecoder - # Use the same word mappings as label indexer. - import_word_mappings_from_globals: True - streams: - inputs: pipe6_c123_predictions - outputs: predicted_answers - globals: - word_mappings: word_mappings_c123_binary_yn - # Viewers. viewer_extended: priority: 100.4 @@ -108,15 +58,16 @@ pipeline: sample_number: 0 input_streams: indices,image_ids,tokenized_questions, - category_names,predicted_categories, - answers,tokenized_answers,predicted_answers + concatenated_activations_size, + category,names, + answers fused_inputs_exporter: priority: 100.5 type: StreamFileExporter separator: '|' filename: 'fused_inputs.csv' - export_separator_line_to_csv: False + export_separator_line_to_csv: True input_streams: - indices + indices, concatenated_activations diff --git a/ptp/workers/processor.py b/ptp/workers/processor.py index c85cfbc..0abadc7 100644 --- a/ptp/workers/processor.py +++ b/ptp/workers/processor.py @@ -112,8 +112,13 @@ def setup_global_experiment(self): # If there are - expand them to absolute paths. abs_root_configs = [path.expanduser(config) for config in root_configs] - # Use path to experiments. - self.abs_path = path.expanduser(self.app_state.args.expdir) + # Using name of the first configuration file from command line. + basename = path.basename(root_configs[0]) + # Take config filename without extension. + pipeline_name = path.splitext(basename)[0] + + # Use path to experiments + pipeline. + self.abs_path = path.join(path.expanduser(self.app_state.args.expdir), pipeline_name) if abs_root_configs is None: From e4168d2e37f75643897d6a0cc54ba035583389ba Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Mon, 6 May 2019 11:43:47 -0700 Subject: [PATCH 5/9] config cleanups --- .../c4_frozen_if_ffn_c4_loss.yml | 151 ++++++++++++++++++ .../example_frozen_if_ffn_c123_loss.yml} | 5 +- ...input_fusion_glove_lstm_vgg_att_is_cat.yml | 37 +---- .../input_fusion_processor_io.yml | 2 +- 4 files changed, 156 insertions(+), 39 deletions(-) create mode 100644 configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml rename configs/vqa_med_2019/{frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml => evaluation/example_frozen_if_ffn_c123_loss.yml} (95%) diff --git a/configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml b/configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml new file mode 100644 index 0000000..732366a --- /dev/null +++ b/configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml @@ -0,0 +1,151 @@ +# Load config defining problems for training, validation and testing. +default_configs: + vqa_med_2019/default_vqa_med_2019.yml, + vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml, + vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml + +c4_hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + batch_size: &batch_size 256 + preload_images: &preload_images False + num_workers: &num_workers 4 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: C4 + export_sample_weights: ~/data/vqa-med/answers.c4.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c4.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: C4 + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding. + pipe6_c123_binary_yn_answer_indexer: + priority: 6.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + # Model 4: FFN C123 answering + pipe6_c123_binary_yn_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe6_c123_binary_yn_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + loss: pipe6_c123_loss + + pipe6_c123_binary_yn_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c123_binary_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_binary_yn_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c123_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, predicted_category_names, + answers, predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml similarity index 95% rename from configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml rename to configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml index 107f9ac..e82be5a 100644 --- a/configs/vqa_med_2019/frozen_pipelines/c123_frozen_if_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml @@ -4,7 +4,7 @@ default_configs: vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml, vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml -hyperparameters: +c123_hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize @@ -15,9 +15,6 @@ hyperparameters: # Accepted formats: a,b,c or [a,b,c] # none | random_affine | random_horizontal_flip | normalize | all - # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val) - question_image_size_fusion_size_val: &question_image_size_fusion_size_val 1134 - # Final classifier: FFN. answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] diff --git a/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml index 5f1d4c5..c3ae040 100644 --- a/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml +++ b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml @@ -29,21 +29,10 @@ checkpoint: &checkpoint ~/image-clef-2019/experiments/c4_encoders/20190504_20244 # + Model 'pipe6_c123_answer_classifier' [FeedForwardNetwork] params saved pipe_if0_hyperparameters: - # In here I am putting some of the hyperparameters from spreadsheet. - - question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize - # Accepted formats: a,b,c or [a,b,c] - # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all - - image_preprocessing: &image_preprocessing normalize - # Accepted formats: a,b,c or [a,b,c] - # none | random_affine | random_horizontal_flip | normalize | all + # WARNING: as we are loading the pretrained pipeline, all those values must stay! # Image encoder. image_encoder_model: &image_encoder_model vgg16 - # Options: vgg16 | densenet121 | resnet152 | resnet50 - #image_encoder_output_size_val: &image_encoder_output_size_val 100 - # INFO: this variable is not important, as we are using features in this pipeline!! # Question encoder. question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt @@ -54,9 +43,6 @@ pipe_if0_hyperparameters: # Fusion I: image + question question_image_fusion_type_val: &question_image_fusion_type VQA_Attention - # Options: ElementWiseMultiplication | VQA_Attention - #question_image_fusion_size_val: &question_image_fusion_size_val 1124 - # INFO: this variable is set by VQA_Attention component! # Image size encoder. image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10 @@ -74,30 +60,14 @@ pipeline: priority: 0.11 type: GlobalVariablePublisher # Add input_size to globals. - keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size] #, image_encoder_output_size] #, fused_activation_size] - values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val] #, *image_encoder_output_size_val] #, *question_image_fusion_size_val] + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size] + values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val] # Statistics. pipe_if0_batch_size: priority: 0.12 type: BatchSizeStatistics - # Answer encoding. - #pipe1_all_answer_indexer: - # priority: 0.13 - # type: LabelIndexer - # data_folder: ~/data/vqa-med - # word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv - # # Export mappings and size to globals. - # export_word_mappings_to_globals: True - # streams: - # inputs: answers - # outputs: answers_ids - # globals: - # vocabulary_size: vocabulary_size_c123_binary_yn - # word_mappings: word_mappings_c123_binary_yn - - ################# PIPE 1: SHARED QUESTION ENCODER ################# # Model 1: question embeddings @@ -207,7 +177,6 @@ pipeline: pipe_if4_question_image_ffn: priority: 4.2 type: FeedForwardNetwork - #hidden_sizes: [*question_image_fusion_size_val] dropout_rate: 0.5 use_logsoftmax: False # LOAD AND FREEZE # diff --git a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml index 76392a6..0f8754d 100644 --- a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml +++ b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml @@ -69,5 +69,5 @@ pipeline: filename: 'fused_inputs.csv' export_separator_line_to_csv: True input_streams: - indices, concatenated_activations + indices #, concatenated_activations From 1d0cdc234d9a7af7c14219e6d33f912267c4657c Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Mon, 6 May 2019 11:53:40 -0700 Subject: [PATCH 6/9] cleanups in c4 configs --- ...n_c4_loss.yml => c4_frozen_if_gru_dec.yml} | 0 .../c4_lstm_vgg16_ewm_cat_is_attdec.yml | 236 ------------------ 2 files changed, 236 deletions(-) rename configs/vqa_med_2019/c4_classification/{c4_frozen_if_ffn_c4_loss.yml => c4_frozen_if_gru_dec.yml} (100%) delete mode 100644 configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml diff --git a/configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml b/configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml similarity index 100% rename from configs/vqa_med_2019/c4_classification/c4_frozen_if_ffn_c4_loss.yml rename to configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml diff --git a/configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml b/configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml deleted file mode 100644 index ee05864..0000000 --- a/configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml +++ /dev/null @@ -1,236 +0,0 @@ -# Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml - -# Training parameters: -training: - problem: - batch_size: 32 # 200 requires to use 4 GPUs! - categories: C4 - question_preprocessing: lowercase, remove_punctuation, tokenize #, random_remove_stop_words #,random_shuffle_words - answer_preprocessing: lowercase, remove_punctuation, tokenize - export_sample_weights: ~/data/vqa-med/answers.c4.weights.csv - sampler: - weights: ~/data/vqa-med/answers.c4.weights.csv - dataloader: - num_workers: 4 - # Termination. - terminal_conditions: - loss_stop: 1.0e-2 - episode_limit: 1000000 - epoch_limit: -1 - -# Validation parameters: -validation: - problem: - batch_size: 32 - categories: C4 - question_preprocessing: lowercase, remove_punctuation, tokenize - answer_preprocessing: lowercase, remove_punctuation, tokenize - dataloader: - num_workers: 4 - -pipeline: - - ################# PIPE 1: SHARED ################# - - global_publisher: - priority: 1.0 - type: GlobalVariablePublisher - # Add input_size to globals. - keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size] - values: [100, 500, 100, 2, 10] - - # Answer encoding. - answer_indexer: - priority: 1.1 - type: SentenceIndexer - data_folder: ~/data/vqa-med - word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv - # Export answer word mappings to globals. - export_word_mappings_to_globals: True - export_pad_mapping_to_globals: True - additional_tokens: , - # Add token at the end of sentence. - eos_token: True - fixed_padding: 10 # The longest question! max is 19! - streams: - inputs: answers - outputs: indexed_answers - globals: - vocabulary_size: ans_vocabulary_size - word_mappings: ans_word_mappings - pad_index: ans_pad_index - - - ################# PIPE 2: SHARED QUESTION ENCODER ################# - - # Question embeddings - question_embeddings: - priority: 2.0 - type: SentenceEmbeddings - embeddings_size: 50 - pretrained_embeddings_file: glove.6B.50d.txt - data_folder: ~/data/vqa-med - word_mappings_file: questions.all.word.mappings.csv - fixed_padding: 10 # The longest question! max is 19! - additional_tokens: , - streams: - inputs: questions - outputs: embedded_questions - - # Single layer GRU Encoder - question_encoder: - priority: 2.1 - type: RecurrentNeuralNetwork - # Do not wrap that model with DataDictParallel! - #parallelize: False - cell_type: GRU - initial_state: Trainable - hidden_size: 50 - num_layers: 1 - # We will project outputs that should reassemble outputs of answer word embeddings. - use_logsoftmax: False - output_last_state: True - prediction_mode: Dense - #ffn_output: False - #dropout_rate: 0.1 - streams: - inputs: embedded_questions - predictions: s2s_encoder_output - output_state: s2s_state_output - globals: - input_size: embeddings_size - prediction_size: question_encoder_output_size - - ################# PIPE 2: SHARED IMAGE ENCODER ################# - - # Image encoder. - image_encoder: - priority: 2.0 - type: TorchVisionWrapper - model_type: vgg16 - streams: - inputs: images - outputs: image_activations - globals: - output_size: image_encoder_output_size - - - - question_hidden_state_reshaper: - priority: 3.01 - type: ReshapeTensor - input_dims: [-1, 1, 100] - output_dims: [-1, 100] - streams: - inputs: s2s_state_output - outputs: s2s_state_output_reshaped - globals: - output_size: s2s_state_output_reshaped_size - - # Element wise multiplication + FF. - question_image_fusion: - priority: 3.1 - type: ElementWiseMultiplication - dropout_rate: 0.5 - streams: - image_encodings: image_activations - question_encodings: s2s_state_output_reshaped - outputs: element_wise_activations - globals: - image_encoding_size: image_encoder_output_size - question_encoding_size: question_encoder_output_size - output_size: element_wise_activation_size - - question_image_to_answer_space_projection_ffn: - # Role of this component is to "project" output of fusion component to "answer space". - priority: 3.2 - type: FeedForwardNetwork - hidden_sizes: [100] - dropout_rate: 0.5 - # Output should not go throught softmax! - use_logsoftmax: False - streams: - inputs: element_wise_activations - predictions: question_image_activations - globals: - input_size: element_wise_activation_size - prediction_size: ans_vocabulary_size - - projected_question_image_reshaper: - priority: 3.3 - type: ReshapeTensor - input_dims: [-1, 2088] - output_dims: [-1, 1, 2088] - streams: - inputs: question_image_activations - outputs: question_image_activations_reshaped - globals: - output_size: question_image_activations_reshaped_size - - # Single layer GRU Decoder with attention - decoder: - type: Attn_Decoder_RNN - priority: 4 - hidden_size: 100 - # Output layer is softmax layer, projecting "1-hot like word encodings". - use_logsoftmax: True - autoregression_length: 10 # Current implementation requires this value to be equal to fixed_padding in SentenceEmbeddings/Indexer... - prediction_mode: Dense - dropout_rate: 0.1 - streams: - inputs: s2s_encoder_output - predictions: predictions - input_state: question_image_activations_reshaped - globals: - input_size: ans_vocabulary_size - prediction_size: ans_vocabulary_size - - s# Loss - nllloss: - type: NLLLoss - priority: 6 - num_targets_dims: 2 - streams: - targets: indexed_answers - loss: loss - globals: - ignore_index: ans_pad_index - - # Prediction decoding. - prediction_decoder: - priority: 10 - type: SentenceIndexer - # Reverse mode. - reverse: True - # Use distributions as inputs. - use_input_distributions: True - data_folder: ~/data/vqa-med - import_word_mappings_from_globals: True - globals: - word_mappings: ans_word_mappings - streams: - inputs: predictions - outputs: prediction_sentences - - # Statistics. - batch_size: - type: BatchSizeStatistics - priority: 100.0 - - bleu: - type: BLEUStatistics - priority: 100.2 - globals: - word_mappings: ans_word_mappings - streams: - targets: indexed_answers - - - # Viewers. - viewer: - type: StreamViewer - priority: 100.3 - input_streams: questions,answers,indexed_answers,prediction_sentences - -#: pipeline From 6c35bc193ab7c1c9b56cc9c475e87009952600fa Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Mon, 6 May 2019 13:23:58 -0700 Subject: [PATCH 7/9] reverted changes on gru decoder with attention, adde config for training c124 using pretrained input fusion pipeline --- .../components/models/attn_decoder_rnn..yml | 78 +++++++++ .../evaluation/frozen_if_ffn_c1234_loss.yml | 154 ++++++++++++++++++ ...3_loss.yml => frozen_if_ffn_c123_loss.yml} | 0 3 files changed, 232 insertions(+) create mode 100644 configs/default/components/models/attn_decoder_rnn..yml create mode 100644 configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml rename configs/vqa_med_2019/evaluation/{example_frozen_if_ffn_c123_loss.yml => frozen_if_ffn_c123_loss.yml} (100%) diff --git a/configs/default/components/models/attn_decoder_rnn..yml b/configs/default/components/models/attn_decoder_rnn..yml new file mode 100644 index 0000000..f676809 --- /dev/null +++ b/configs/default/components/models/attn_decoder_rnn..yml @@ -0,0 +1,78 @@ +# This file defines the default values for the GRU decoder with attention. + +#################################################################### +# 1. CONFIGURATION PARAMETERS that will be LOADED by the component. +#################################################################### + +# Size of the hidden state (LOADED) +hidden_size: 100 + +# Wether to include the last hidden state in the outputs +output_last_state: False + +# Type of recurrent cell (LOADED) +# -> Only GRU is supported + +# Number of "stacked" layers (LOADED) +# -> Only a single layer is supported + +# Dropout rate (LOADED) +# Default: 0 (means that it is turned off) +dropout_rate: 0 + +# Prediction mode (LOADED) +# Options: +# * Dense (passes every activation through output layer) | +# * Last (passes only the last activation though output layer) | +# * None (all outputs are discarded) +prediction_mode: Dense + +# Enable FFN layer at the output of the RNN (before eventual feed back in the case of autoregression). +# Useful if the raw outputs of the RNN are needed, for attention encoder-decoder for example. +ffn_output: True + +# Length of generated output sequence (LOADED) +# User must set it per task, as it is task specific. +autoregression_length: 10 + +# If true, output of the last layer will be additionally processed with Log Softmax (LOADED) +use_logsoftmax: True + +streams: + #################################################################### + # 2. Keymappings associated with INPUT and OUTPUT streams. + #################################################################### + + # Stream containing batch of encoder outputs (INPUT) + inputs: inputs + + # Stream containing the inital state of the RNN (INPUT) + # The stream will be actually created only if `inital_state: Input` + input_state: input_state + + # Stream containing predictions (OUTPUT) + predictions: predictions + + # Stream containing the final output state of the RNN (output) + # The stream will be actually created only if `output_last_state: True` + output_state: output_state + +globals: + #################################################################### + # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. + #################################################################### + + # Size of the input (RETRIEVED) + input_size: input_size + + # Size of the prediction (RETRIEVED) + prediction_size: prediction_size + + #################################################################### + # 4. Keymappings associated with GLOBAL variables that will be SET. + #################################################################### + + #################################################################### + # 5. Keymappings associated with statistics that will be ADDED. + #################################################################### + diff --git a/configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml new file mode 100644 index 0000000..9407c13 --- /dev/null +++ b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml @@ -0,0 +1,154 @@ +# Load config defining problems for training, validation and testing. +default_configs: + vqa_med_2019/default_vqa_med_2019.yml, + vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml + #vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml + +c123_hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500,500] + + batch_size: &batch_size 256 + preload_images: &preload_images False + num_workers: &num_workers 4 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: all + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_c4_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_c4_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: all + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding. + pipe6_c1234_answer_indexer: + priority: 6.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.all.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c1234_binary_yn + word_mappings: word_mappings_c1234_binary_yn + + + # Model 4: FFN C123 answering + pipe6_c1234_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c1234_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1234_binary_yn + + pipe6_c1234_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c1234_predictions + targets: answers_ids + loss: pipe6_c1234_loss + + pipe6_c1234_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c1234_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c1234_binary_yn + statistics: + precision: pipe6_c1234_precision + recall: pipe6_c1234_recall + f1score: pipe6_c1234_f1score + + # C123 Predictions decoder. + pipe6_c1234_binary_yn_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c1234_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c1234_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, predicted_category_names, + answers, predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c123_loss.yml similarity index 100% rename from configs/vqa_med_2019/evaluation/example_frozen_if_ffn_c123_loss.yml rename to configs/vqa_med_2019/evaluation/frozen_if_ffn_c123_loss.yml From afbbcb129f0c4af06466bed4116deb5bebbfbd26 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Mon, 6 May 2019 13:28:22 -0700 Subject: [PATCH 8/9] missing rnn --- ptp/components/models/attn_decoder_rnn..py | 242 +++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 ptp/components/models/attn_decoder_rnn..py diff --git a/ptp/components/models/attn_decoder_rnn..py b/ptp/components/models/attn_decoder_rnn..py new file mode 100644 index 0000000..32a2c14 --- /dev/null +++ b/ptp/components/models/attn_decoder_rnn..py @@ -0,0 +1,242 @@ +# Copyright (C) Alexis Asseman, IBM Corporation 2019 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = "Alexis Asseman" + +import torch + +from ptp.configuration.configuration_error import ConfigurationError +from ptp.components.models.model import Model +from ptp.data_types.data_definition import DataDefinition + + +class Attn_Decoder_RNN(Model): + """ + Single layer GRU decoder with attention: + Bahdanau, D., Cho, K., & Bengio, Y. (2014). Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473. + + Needs the full sequence of hidden states from the encoder as input, as well as the last hidden state from the encoder as input state. + + Code is based on https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html. + """ + def __init__(self, name, config): + """ + Initializes the model. + + :param config: Dictionary of parameters (read from configuration ``.yaml`` file). + :type config: ``ptp.configuration.ConfigInterface`` + """ + # Call constructors of parent classes. + Model.__init__(self, name, Attn_Decoder_RNN, config) + + # Get input/output mode + self.output_last_state = self.config["output_last_state"] + self.ffn_output = self.config["ffn_output"] + + # Get prediction mode from configuration. + self.prediction_mode = self.config["prediction_mode"] + if self.prediction_mode not in ['Dense','Last', 'None']: + raise ConfigurationError("Invalid 'prediction_mode' (current {}, available {})".format(self.prediction_mode, ['Dense','Last', 'None'])) + + self.autoregression_length = self.config["autoregression_length"] + + # Retrieve input size from global variables. + self.key_input_size = self.global_keys["input_size"] + self.input_size = self.globals["input_size"] + if type(self.input_size) == list: + if len(self.input_size) == 1: + self.input_size = self.input_size[0] + else: + raise ConfigurationError("RNN input size '{}' must be a single dimension (current {})".format(self.key_input_size, self.input_size)) + + # Retrieve output (prediction) size from global params. + self.prediction_size = self.globals["prediction_size"] + if type(self.prediction_size) == list: + if len(self.prediction_size) == 1: + self.prediction_size = self.prediction_size[0] + else: + raise ConfigurationError("RNN prediction size '{}' must be a single dimension (current {})".format(self.key_prediction_size, self.prediction_size)) + + # Retrieve hidden size from configuration. + self.hidden_size = self.config["hidden_size"] + if type(self.hidden_size) == list: + if len(self.hidden_size) == 1: + self.hidden_size = self.hidden_size[0] + else: + raise ConfigurationError("RNN hidden_size must be a single dimension (current {})".format(self.hidden_size)) + + # Get dropout rate value from config. + dropout_rate = self.config["dropout_rate"] + + # Create dropout layer. + self.dropout = torch.nn.Dropout(dropout_rate) + + # Create rnn cell: hardcoded one layer GRU. + self.rnn_cell = getattr(torch.nn, "GRU")(self.input_size, self.hidden_size, 1, dropout=dropout_rate, batch_first=True) + + # Create layers for the attention + self.attn = torch.nn.Linear(self.hidden_size * 2, self.autoregression_length) + self.attn_combine = torch.nn.Linear(self.hidden_size * 2, self.hidden_size) + + # Create the trainable initial input for the decoder (A trained token of sorts) + self.sos_token = torch.zeros(1, self.input_size) + torch.nn.init.xavier_uniform(self.sos_token) + self.sos_token = torch.nn.Parameter(self.sos_token, requires_grad=True) + + # Get key mappings. + self.key_inputs = self.stream_keys["inputs"] + self.key_predictions = self.stream_keys["predictions"] + self.key_input_state = self.stream_keys["input_state"] + if self.output_last_state: + self.key_output_state = self.stream_keys["output_state"] + + self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size)) + + # Create the output layer. + self.activation2output_layer = None + if(self.ffn_output): + self.activation2output_layer = torch.nn.Linear(self.hidden_size, self.prediction_size) + + # Create the final non-linearity. + self.use_logsoftmax = self.config["use_logsoftmax"] + if self.use_logsoftmax: + if self.prediction_mode == "Dense": + # Used then returning dense prediction, i.e. every output of unfolded model. + self.log_softmax = torch.nn.LogSoftmax(dim=2) + else: + # Used when returning only the last output. + self.log_softmax = torch.nn.LogSoftmax(dim=1) + + def activation2output(self, activations): + output = self.dropout(activations) + + if(self.ffn_output): + #output = activations.squeeze(1) + shape = activations.shape + + # Reshape to 2D tensor [BATCH_SIZE * SEQ_LEN x HIDDEN_SIZE] + output = output.contiguous().view(-1, shape[2]) + + # Propagate data through the output layer [BATCH_SIZE * SEQ_LEN x PREDICTION_SIZE] + output = self.activation2output_layer(output) + #output = output.unsqueeze(1) + + # Reshape back to 3D tensor [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE] + output = output.view(shape[0], shape[1], output.size(1)) + + return output + + + def input_data_definitions(self): + """ + Function returns a dictionary with definitions of input data that are required by the component. + + :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + d = {} + + d[self.key_inputs] = DataDefinition([-1, -1, self.hidden_size], [torch.Tensor], "Batch of encoder outputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") + + # Input hidden state + d[self.key_input_state] = DataDefinition([-1, 1, self.hidden_size], [torch.Tensor], "Batch of RNN last hidden states passed from another RNN that will be used as initial [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]") + + return d + + def output_data_definitions(self): + """ + Function returns a dictionary with definitions of output data produced the component. + + :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + d = {} + + if self.prediction_mode == "Dense": + d[self.key_predictions] = DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") + elif self.prediction_mode == "Last": # "Last" + # Only last prediction. + d[self.key_predictions] = DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") + + # Output hidden state stream TODO: why do we need that? + if self.output_last_state: + d[self.key_output_state] = DataDefinition([-1, 1, self.hidden_size], [torch.Tensor], "Batch of RNN final hidden states [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]") + + return d + + def forward(self, data_dict): + """ + Forward pass of the model. + + :param data_dict: DataDict({'inputs', 'predictions ...}), where: + + - inputs: expected inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE], + - predictions: returned output with predictions (log_probs) [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE] + """ + + inputs = data_dict[self.key_inputs] + batch_size = inputs.shape[0] + #print("{}: input shape: {}, device: {}\n".format(self.name, inputs.shape, inputs.device)) + + # Initialize hidden state from inputs - as last hidden state from external component. + hidden = data_dict[self.key_input_state] + # For RNNs (aside of LSTM): [BATCH_SIZE x NUM_LAYERS x HIDDEN_SIZE] -> [NUM_LAYERS x BATCH_SIZE x HIDDEN_SIZE] + hidden = hidden.transpose(0,1) + #print("{}: hidden shape: {}, device: {}\n".format(self.name, hidden.shape, hidden.device)) + + # List that will contain the output sequence + activations = [] + + # First input to the decoder - trainable "start of sequence" token + activations_partial = self.sos_token.expand(batch_size, -1).unsqueeze(1) + + # Feed back the outputs iteratively + for i in range(self.autoregression_length): + + # Do the attention thing + attn_weights = torch.nn.functional.softmax( + self.attn(torch.cat((activations_partial.transpose(0, 1), hidden), 2)), + dim=2 + ) + attn_applied = torch.bmm(attn_weights.transpose(0, 1), inputs) + activations_partial = torch.cat((activations_partial, attn_applied), 2) + activations_partial = self.attn_combine(activations_partial) + activations_partial = torch.nn.functional.relu(activations_partial) + + # Feed through the RNN + activations_partial, hidden = self.rnn_cell(activations_partial, hidden) + activations_partial = self.activation2output(activations_partial) + + # Add the single step output into list + if self.prediction_mode == "Dense": + activations += [activations_partial] + + # Reassemble all the outputs from list into an output tensor + if self.prediction_mode == "Dense": + outputs = torch.cat(activations, 1) + # Log softmax - along PREDICTION dim. + if self.use_logsoftmax: + outputs = self.log_softmax(outputs) + # Add predictions to datadict. + data_dict.extend({self.key_predictions: outputs}) + elif self.prediction_mode == "Last": + if self.use_logsoftmax: + outputs = self.log_softmax(activations_partial.squeeze(1)) + # Add predictions to datadict. + data_dict.extend({self.key_predictions: outputs}) + + # Output last hidden state, if requested + if self.output_last_state: + # For others: [NUM_LAYERS x BATCH_SIZE x HIDDEN_SIZE] -> [BATCH_SIZE x NUM_LAYERS x HIDDEN_SIZE] + hidden = hidden.transpose(0,1) + # Export last hidden state. + data_dict.extend({self.key_output_state: hidden}) From ed6819076f4840a3f577cbf28fd1609b63cff11c Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Mon, 6 May 2019 13:41:59 -0700 Subject: [PATCH 9/9] removed doubled attn_decoder_rnn..py --- ptp/components/models/attn_decoder_rnn..py | 242 --------------------- 1 file changed, 242 deletions(-) delete mode 100644 ptp/components/models/attn_decoder_rnn..py diff --git a/ptp/components/models/attn_decoder_rnn..py b/ptp/components/models/attn_decoder_rnn..py deleted file mode 100644 index 32a2c14..0000000 --- a/ptp/components/models/attn_decoder_rnn..py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright (C) Alexis Asseman, IBM Corporation 2019 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__author__ = "Alexis Asseman" - -import torch - -from ptp.configuration.configuration_error import ConfigurationError -from ptp.components.models.model import Model -from ptp.data_types.data_definition import DataDefinition - - -class Attn_Decoder_RNN(Model): - """ - Single layer GRU decoder with attention: - Bahdanau, D., Cho, K., & Bengio, Y. (2014). Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473. - - Needs the full sequence of hidden states from the encoder as input, as well as the last hidden state from the encoder as input state. - - Code is based on https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html. - """ - def __init__(self, name, config): - """ - Initializes the model. - - :param config: Dictionary of parameters (read from configuration ``.yaml`` file). - :type config: ``ptp.configuration.ConfigInterface`` - """ - # Call constructors of parent classes. - Model.__init__(self, name, Attn_Decoder_RNN, config) - - # Get input/output mode - self.output_last_state = self.config["output_last_state"] - self.ffn_output = self.config["ffn_output"] - - # Get prediction mode from configuration. - self.prediction_mode = self.config["prediction_mode"] - if self.prediction_mode not in ['Dense','Last', 'None']: - raise ConfigurationError("Invalid 'prediction_mode' (current {}, available {})".format(self.prediction_mode, ['Dense','Last', 'None'])) - - self.autoregression_length = self.config["autoregression_length"] - - # Retrieve input size from global variables. - self.key_input_size = self.global_keys["input_size"] - self.input_size = self.globals["input_size"] - if type(self.input_size) == list: - if len(self.input_size) == 1: - self.input_size = self.input_size[0] - else: - raise ConfigurationError("RNN input size '{}' must be a single dimension (current {})".format(self.key_input_size, self.input_size)) - - # Retrieve output (prediction) size from global params. - self.prediction_size = self.globals["prediction_size"] - if type(self.prediction_size) == list: - if len(self.prediction_size) == 1: - self.prediction_size = self.prediction_size[0] - else: - raise ConfigurationError("RNN prediction size '{}' must be a single dimension (current {})".format(self.key_prediction_size, self.prediction_size)) - - # Retrieve hidden size from configuration. - self.hidden_size = self.config["hidden_size"] - if type(self.hidden_size) == list: - if len(self.hidden_size) == 1: - self.hidden_size = self.hidden_size[0] - else: - raise ConfigurationError("RNN hidden_size must be a single dimension (current {})".format(self.hidden_size)) - - # Get dropout rate value from config. - dropout_rate = self.config["dropout_rate"] - - # Create dropout layer. - self.dropout = torch.nn.Dropout(dropout_rate) - - # Create rnn cell: hardcoded one layer GRU. - self.rnn_cell = getattr(torch.nn, "GRU")(self.input_size, self.hidden_size, 1, dropout=dropout_rate, batch_first=True) - - # Create layers for the attention - self.attn = torch.nn.Linear(self.hidden_size * 2, self.autoregression_length) - self.attn_combine = torch.nn.Linear(self.hidden_size * 2, self.hidden_size) - - # Create the trainable initial input for the decoder (A trained token of sorts) - self.sos_token = torch.zeros(1, self.input_size) - torch.nn.init.xavier_uniform(self.sos_token) - self.sos_token = torch.nn.Parameter(self.sos_token, requires_grad=True) - - # Get key mappings. - self.key_inputs = self.stream_keys["inputs"] - self.key_predictions = self.stream_keys["predictions"] - self.key_input_state = self.stream_keys["input_state"] - if self.output_last_state: - self.key_output_state = self.stream_keys["output_state"] - - self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size)) - - # Create the output layer. - self.activation2output_layer = None - if(self.ffn_output): - self.activation2output_layer = torch.nn.Linear(self.hidden_size, self.prediction_size) - - # Create the final non-linearity. - self.use_logsoftmax = self.config["use_logsoftmax"] - if self.use_logsoftmax: - if self.prediction_mode == "Dense": - # Used then returning dense prediction, i.e. every output of unfolded model. - self.log_softmax = torch.nn.LogSoftmax(dim=2) - else: - # Used when returning only the last output. - self.log_softmax = torch.nn.LogSoftmax(dim=1) - - def activation2output(self, activations): - output = self.dropout(activations) - - if(self.ffn_output): - #output = activations.squeeze(1) - shape = activations.shape - - # Reshape to 2D tensor [BATCH_SIZE * SEQ_LEN x HIDDEN_SIZE] - output = output.contiguous().view(-1, shape[2]) - - # Propagate data through the output layer [BATCH_SIZE * SEQ_LEN x PREDICTION_SIZE] - output = self.activation2output_layer(output) - #output = output.unsqueeze(1) - - # Reshape back to 3D tensor [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE] - output = output.view(shape[0], shape[1], output.size(1)) - - return output - - - def input_data_definitions(self): - """ - Function returns a dictionary with definitions of input data that are required by the component. - - :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). - """ - d = {} - - d[self.key_inputs] = DataDefinition([-1, -1, self.hidden_size], [torch.Tensor], "Batch of encoder outputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") - - # Input hidden state - d[self.key_input_state] = DataDefinition([-1, 1, self.hidden_size], [torch.Tensor], "Batch of RNN last hidden states passed from another RNN that will be used as initial [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]") - - return d - - def output_data_definitions(self): - """ - Function returns a dictionary with definitions of output data produced the component. - - :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). - """ - d = {} - - if self.prediction_mode == "Dense": - d[self.key_predictions] = DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") - elif self.prediction_mode == "Last": # "Last" - # Only last prediction. - d[self.key_predictions] = DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") - - # Output hidden state stream TODO: why do we need that? - if self.output_last_state: - d[self.key_output_state] = DataDefinition([-1, 1, self.hidden_size], [torch.Tensor], "Batch of RNN final hidden states [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]") - - return d - - def forward(self, data_dict): - """ - Forward pass of the model. - - :param data_dict: DataDict({'inputs', 'predictions ...}), where: - - - inputs: expected inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE], - - predictions: returned output with predictions (log_probs) [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE] - """ - - inputs = data_dict[self.key_inputs] - batch_size = inputs.shape[0] - #print("{}: input shape: {}, device: {}\n".format(self.name, inputs.shape, inputs.device)) - - # Initialize hidden state from inputs - as last hidden state from external component. - hidden = data_dict[self.key_input_state] - # For RNNs (aside of LSTM): [BATCH_SIZE x NUM_LAYERS x HIDDEN_SIZE] -> [NUM_LAYERS x BATCH_SIZE x HIDDEN_SIZE] - hidden = hidden.transpose(0,1) - #print("{}: hidden shape: {}, device: {}\n".format(self.name, hidden.shape, hidden.device)) - - # List that will contain the output sequence - activations = [] - - # First input to the decoder - trainable "start of sequence" token - activations_partial = self.sos_token.expand(batch_size, -1).unsqueeze(1) - - # Feed back the outputs iteratively - for i in range(self.autoregression_length): - - # Do the attention thing - attn_weights = torch.nn.functional.softmax( - self.attn(torch.cat((activations_partial.transpose(0, 1), hidden), 2)), - dim=2 - ) - attn_applied = torch.bmm(attn_weights.transpose(0, 1), inputs) - activations_partial = torch.cat((activations_partial, attn_applied), 2) - activations_partial = self.attn_combine(activations_partial) - activations_partial = torch.nn.functional.relu(activations_partial) - - # Feed through the RNN - activations_partial, hidden = self.rnn_cell(activations_partial, hidden) - activations_partial = self.activation2output(activations_partial) - - # Add the single step output into list - if self.prediction_mode == "Dense": - activations += [activations_partial] - - # Reassemble all the outputs from list into an output tensor - if self.prediction_mode == "Dense": - outputs = torch.cat(activations, 1) - # Log softmax - along PREDICTION dim. - if self.use_logsoftmax: - outputs = self.log_softmax(outputs) - # Add predictions to datadict. - data_dict.extend({self.key_predictions: outputs}) - elif self.prediction_mode == "Last": - if self.use_logsoftmax: - outputs = self.log_softmax(activations_partial.squeeze(1)) - # Add predictions to datadict. - data_dict.extend({self.key_predictions: outputs}) - - # Output last hidden state, if requested - if self.output_last_state: - # For others: [NUM_LAYERS x BATCH_SIZE x HIDDEN_SIZE] -> [BATCH_SIZE x NUM_LAYERS x HIDDEN_SIZE] - hidden = hidden.transpose(0,1) - # Export last hidden state. - data_dict.extend({self.key_output_state: hidden})