From 6f421bd97d34ab6d97f4ef024cabd5ffb4f32e53 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 16:31:22 -0700 Subject: [PATCH 01/28] removed data augmentations from c2 configs --- .../c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml | 4 ++-- .../c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml | 4 ++-- .../vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml | 4 ++-- .../c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml index 0bce435..86a7779 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml @@ -5,9 +5,9 @@ training: problem: batch_size: 48 # Appy all preprocessing/data augmentations. - image_preprocessing: all + image_preprocessing: normalize # none | random_affine | random_horizontal_flip | normalize | all - question_preprocessing: all + question_preprocessing: lowercase,remove_punctuation,tokenize # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all streams: # Problem is returning tokenized questions. diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml index d9020d2..7f13e15 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml @@ -5,9 +5,9 @@ training: problem: batch_size: 32 # Appy all preprocessing/data augmentations. - image_preprocessing: all + image_preprocessing: normalize # none | random_affine | random_horizontal_flip | normalize | all - question_preprocessing: all + question_preprocessing: lowercase,remove_punctuation,tokenize # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all streams: # Problem is returning tokenized questions. diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml index 4991b84..0680cac 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml @@ -4,9 +4,9 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml training: problem: # Appy all preprocessing/data augmentations. - image_preprocessing: all + image_preprocessing: normalize # none | random_affine | random_horizontal_flip | normalize | all - question_preprocessing: all + question_preprocessing: lowercase,remove_punctuation,tokenize # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all streams: # Problem is returning tokenized questions. diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml index c97870b..cc9a025 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml @@ -4,9 +4,9 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml training: problem: # Appy all preprocessing/data augmentations. - image_preprocessing: all + image_preprocessing: normalize # none | random_affine | random_horizontal_flip | normalize | all - question_preprocessing: all + question_preprocessing: lowercase,remove_punctuation,tokenize # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all streams: # Problem is returning tokenized questions. From 8e7d08b6c859addc01a9fb11a34125cec30718b1 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 16:56:52 -0700 Subject: [PATCH 02/28] added using config name as pipeline name in trainer --- .../c2_class_lstm_resnet152_ewm_cat_is.yml | 1 - .../c2_class_lstm_resnet152_rn_cat_is.yml | 1 - .../c2_class_lstm_vgg16_rn.yml | 1 - .../c2_class_lstm_vgg16_rn_cat_is.yml | 1 - ...c2_classification_all_rnn_vgg16_concat.yml | 1 - .../c2_classification_all_rnn_vgg16_ewm.yml | 1 - ..._classification_all_rnn_vgg16_ewm_size.yml | 1 - .../c2_classification_all_rnn_vgg16_mcb.yml | 1 - .../c2_word_answer_onehot_bow.yml | 1 - ptp/workers/processor.py | 14 +++--- ptp/workers/trainer.py | 47 +++++++------------ 11 files changed, 25 insertions(+), 45 deletions(-) diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml index 86a7779..8558fd7 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml @@ -24,7 +24,6 @@ validation: pipeline: - name: c2_class_lstm_resnet152_ewm_cat_is global_publisher: priority: 0 diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml index 7f13e15..1a1a05c 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_rn_cat_is.yml @@ -24,7 +24,6 @@ validation: pipeline: - name: c2_class_lstm_resnet152_rn_cat_is global_publisher: priority: 0 diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml index 0680cac..14e4de2 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn.yml @@ -22,7 +22,6 @@ validation: pipeline: - name: c2_class_lstm_vgg16_rn global_publisher: priority: 0 diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml index cc9a025..22c25e4 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_vgg16_rn_cat_is.yml @@ -22,7 +22,6 @@ validation: pipeline: - name: c2_class_lstm_vgg16_rn_cat_is global_publisher: priority: 0 diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml index d3aa792..51fba8d 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml @@ -2,7 +2,6 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml pipeline: - name: vqa_med_c2_classification_all_rnn_vgg_concat global_publisher: priority: 0 diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm.yml index 84c8bf8..5447526 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm.yml @@ -2,7 +2,6 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml pipeline: - name: c2_classification_all_rnn_vgg16_ewm global_publisher: priority: 0 diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml index 7db3a3c..d34899b 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml @@ -2,7 +2,6 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml pipeline: - name: c2_classification_all_rnn_vgg16_ewm_size global_publisher: priority: 0 diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml index cabc1dc..d28a24f 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml @@ -2,7 +2,6 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml pipeline: - name: c2_classification_all_rnn_vgg16_mcb global_publisher: priority: 0 diff --git a/configs/vqa_med_2019/c2_classification/c2_word_answer_onehot_bow.yml b/configs/vqa_med_2019/c2_classification/c2_word_answer_onehot_bow.yml index 73dcce7..2d28708 100644 --- a/configs/vqa_med_2019/c2_classification/c2_word_answer_onehot_bow.yml +++ b/configs/vqa_med_2019/c2_classification/c2_word_answer_onehot_bow.yml @@ -14,7 +14,6 @@ validation: batch_size: 128 pipeline: - name: c2_word_answer_onehot_bow # Answer encoding. answer_tokenizer: diff --git a/ptp/workers/processor.py b/ptp/workers/processor.py index bfc29dc..b5afa68 100644 --- a/ptp/workers/processor.py +++ b/ptp/workers/processor.py @@ -17,7 +17,7 @@ __author__ = "Tomasz Kornuta, Vincent Marois, Younes Bouhadjar" -import os +from os import path,makedirs import torch from time import sleep from datetime import datetime @@ -93,12 +93,12 @@ def setup_global_experiment(self): exit(-2) # Check if file with model exists. - if not os.path.isfile(chkpt_file): + if not path.isfile(chkpt_file): print('Checkpoint file {} does not exist'.format(chkpt_file)) exit(-3) # Extract path. - self.abs_path, _ = os.path.split(os.path.dirname(os.path.expanduser(chkpt_file))) + self.abs_path, _ = path.split(path.dirname(path.expanduser(chkpt_file))) print(self.abs_path) # Check if config file was indicated by the user. @@ -106,10 +106,10 @@ def setup_global_experiment(self): # Split and make them absolute. root_configs = self.app_state.args.config.replace(" ", "").split(',') # If there are - expand them to absolute paths. - abs_root_configs = [os.path.expanduser(config) for config in root_configs] + abs_root_configs = [path.expanduser(config) for config in root_configs] else: # Use the "default one". - abs_root_configs = [os.path.join(self.abs_path, 'training_configuration.yml')] + abs_root_configs = [path.join(self.abs_path, 'training_configuration.yml')] # Get the list of configurations which need to be loaded. configs_to_load = config_parsing.recurrent_config_parse(abs_root_configs, [], self.app_state.absolute_config_path) @@ -160,7 +160,7 @@ def setup_individual_experiment(self): self.app_state.log_dir = self.abs_path + '/' + time_str + '/' # Lowercase dir. self.app_state.log_dir = self.app_state.log_dir.lower() - os.makedirs(self.app_state.log_dir, exist_ok=False) + makedirs(self.app_state.log_dir, exist_ok=False) except FileExistsError: sleep(1) else: @@ -254,7 +254,7 @@ def setup_individual_experiment(self): pipeline_name = "" # Try to load the model. if pipeline_name != "": - if os.path.isfile(pipeline_name): + if path.isfile(pipeline_name): # Load parameters from checkpoint. self.pipeline.load(pipeline_name) else: diff --git a/ptp/workers/trainer.py b/ptp/workers/trainer.py index c3a583c..9a93591 100644 --- a/ptp/workers/trainer.py +++ b/ptp/workers/trainer.py @@ -17,7 +17,7 @@ __author__ = "Vincent Marois, Tomasz Kornuta" -import os +from os import path,makedirs import yaml import torch from time import sleep @@ -84,38 +84,25 @@ def setup_experiment(self): - Calls base class setup_experiment to parse the command line arguments, - - Loads the config file(s): + - Loads the config file(s) - >>> configs_to_load = self.recurrent_config_parse(flags.config, []) + - Set up the log directory path - - Set up the log directory path: + - Add a ``FileHandler`` to the logger - >>> os.makedirs(self.app_state.log_dir, exist_ok=False) - - - Add a ``FileHandler`` to the logger: - - >>> self.add_file_handler_to_logger(self.log_file) - - - Set random seeds: - - >>> self.set_random_seeds(self.config['training'], 'training') + - Set random seeds - Creates the pipeline consisting of many components - Creates training problem manager - - Handles curriculum learning if indicated: + - Handles curriculum learning if indicated - >>> if 'curriculum_learning' in self.config['training']: - >>> ... + - Creates validation problem manager - - Creates training problem manager - - - Set optimizer: + - Set optimizer - >>> self.optimizer = getattr(torch.optim, optimizer_name) - - - Performs testing of compatibility of both training and validation pipelines. + - Performs testing of compatibility of both training and validation problems and created pipeline. """ # Call base method to parse all command line arguments and add default sections. @@ -134,7 +121,7 @@ def setup_experiment(self): # Split and make them absolute. root_configs = self.app_state.args.config.replace(" ", "").split(',') # If there are - expand them to absolute paths. - abs_root_configs = [os.path.expanduser(config) for config in root_configs] + abs_root_configs = [path.expanduser(config) for config in root_configs] # Get the list of configurations which need to be loaded. configs_to_load = config_parse.recurrent_config_parse(abs_root_configs, [], self.app_state.absolute_config_path) @@ -168,8 +155,10 @@ def setup_experiment(self): try: pipeline_name = self.config['pipeline']['name'] except KeyError: - print("Error: Couldn't retrieve the pipeline 'name' from the loaded configuration") - exit(-1) + # Using name of the first configuration file from command line. + pipeline_name = path.basename(root_configs[0]) + # Set pipeline name, so processor can use it afterwards. + self.config['pipeline'].add_config_params({'name': pipeline_name}) # Prepare the output path for logging while True: # Dirty fix: if log_dir already exists, wait for 1 second and try again @@ -177,10 +166,10 @@ def setup_experiment(self): time_str = '{0:%Y%m%d_%H%M%S}'.format(datetime.now()) if self.app_state.args.savetag != '': time_str = time_str + "_" + self.app_state.args.savetag - self.app_state.log_dir = os.path.expanduser(self.app_state.args.expdir) + '/' + training_problem_type + '/' + pipeline_name + '/' + time_str + '/' + self.app_state.log_dir = path.expanduser(self.app_state.args.expdir) + '/' + training_problem_type + '/' + pipeline_name + '/' + time_str + '/' # Lowercase dir. self.app_state.log_dir = self.app_state.log_dir.lower() - os.makedirs(self.app_state.log_dir, exist_ok=False) + makedirs(self.app_state.log_dir, exist_ok=False) except FileExistsError: sleep(1) else: @@ -199,7 +188,7 @@ def setup_experiment(self): # Models dir. self.checkpoint_dir = self.app_state.log_dir + 'checkpoints/' - os.makedirs(self.checkpoint_dir, exist_ok=False) + makedirs(self.checkpoint_dir, exist_ok=False) # Set random seeds in the training section. self.set_random_seeds('training', self.config['training']) @@ -283,7 +272,7 @@ def setup_experiment(self): pipeline_name = "" # Try to load the model. if pipeline_name != "": - if os.path.isfile(pipeline_name): + if path.isfile(pipeline_name): # Load parameters from checkpoint. self.pipeline.load(pipeline_name) else: From 94f1e63ea20ff155d5d44208ed848d4e16e7c288 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 17:34:40 -0700 Subject: [PATCH 03/28] resnet50 feature_maps mode and c2 configs using resnet50 with EWM/RN --- .../c2_class_lstm_resnet50_ewm_cat_is.yml | 144 ++++++++++++++++++ .../c2_class_lstm_resnet50_rn_cat_is.yml | 141 +++++++++++++++++ ptp/components/models/torch_vision_wrapper.py | 21 ++- 3 files changed, 302 insertions(+), 4 deletions(-) create mode 100644 configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml create mode 100644 configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_rn_cat_is.yml diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml new file mode 100644 index 0000000..4834d40 --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml @@ -0,0 +1,144 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml + +training: + problem: + batch_size: 48 + # Appy all preprocessing/data augmentations. + image_preprocessing: normalize + # none | random_affine | random_horizontal_flip | normalize | all + question_preprocessing: lowercase,remove_punctuation,tokenize + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + streams: + # Problem is returning tokenized questions. + questions: tokenized_questions + +validation: + problem: + batch_size: 48 + question_preprocessing: lowercase,remove_punctuation,tokenize + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + streams: + # Problem is returning tokenized questions. + questions: tokenized_questions + + +pipeline: + + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size] + values: [100, 100, 100, 2, 10] + + ################# PIPE 0: question ################# + + # Model 1: Embeddings + question_embeddings: + priority: 1.2 + type: SentenceEmbeddings + embeddings_size: 100 + pretrained_embeddings_file: glove.6B.100d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + priority: 1.3 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + use_logsoftmax: False + initial_state: Trainable + dropout_rate: 0.1 + hidden_size: 50 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + ################# PIPE 2: image ################# + # Image encoder. + image_encoder: + priority: 3.1 + type: TorchVisionWrapper + model_type: resnet50 + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: image-question fusion ################# + # Element wise multiplication + FF. + question_image_fusion: + priority: 4.1 + type: ElementWiseMultiplication + dropout_rate: 0.5 + streams: + image_encodings: image_activations + question_encodings: question_activations + outputs: element_wise_activations + globals: + image_encoding_size: image_encoder_output_size + question_encoding_size: question_encoder_output_size + output_size: element_wise_activation_size + + question_image_ffn: + priority: 4.2 + type: FeedForwardNetwork + hidden_sizes: [100] + dropout_rate: 0.5 + streams: + inputs: element_wise_activations + predictions: question_image_activations + globals: + input_size: element_wise_activation_size + prediction_size: element_wise_activation_size + + ################# PIPE 5: image-question-image size fusion + classification ################# + # Model - image size FFN. + image_size_encoder: + priority: 5.1 + type: FeedForwardNetwork + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + priority: 5.2 + type: Concatenation + input_streams: [question_image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10]] + output_dims: [-1,110] + streams: + outputs: concatenated_activations + globals: + output_size: concatentated_activations_size + + + classifier: + priority: 5.3 + type: FeedForwardNetwork + hidden_sizes: [100] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + globals: + input_size: concatentated_activations_size + prediction_size: vocabulary_size_c2 + + + #: pipeline diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_rn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_rn_cat_is.yml new file mode 100644 index 0000000..51dd275 --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_rn_cat_is.yml @@ -0,0 +1,141 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml + +training: + problem: + batch_size: 32 + # Appy all preprocessing/data augmentations. + image_preprocessing: normalize + # none | random_affine | random_horizontal_flip | normalize | all + question_preprocessing: lowercase,remove_punctuation,tokenize + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + streams: + # Problem is returning tokenized questions. + questions: tokenized_questions + +validation: + problem: + batch_size: 32 + question_preprocessing: lowercase,remove_punctuation,tokenize + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + streams: + # Problem is returning tokenized questions. + questions: tokenized_questions + + +pipeline: + + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size,rn_activation_size,image_size_encoder_input_size, image_size_encoder_output_size] + values: [100, 100, 2, 10] + + ################# PIPE 0: question ################# + + # Model 1: Embeddings + question_embeddings: + priority: 1.2 + type: SentenceEmbeddings + embeddings_size: 100 + pretrained_embeddings_file: glove.6B.100d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + priority: 1.3 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + use_logsoftmax: False + initial_state: Trainable + dropout_rate: 0.1 + hidden_size: 50 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + ################# PIPE 2: image ################# + # Image encoder. + image_encoder: + priority: 3.1 + type: TorchVisionWrapper + model_type: resnet50 + return_feature_maps: True + streams: + inputs: images + outputs: feature_maps + + ################# PIPE 3: Fusion: Relational Network ################# + # Object-object relations. + question_image_fusion: + priority: 4.1 + type: RelationalNetwork + dropout_rate: 0.5 + g_theta_sizes: [512, 256] + streams: + question_encodings: question_activations + outputs: fused_image_question_activations + globals: + question_encoding_size: question_encoder_output_size + output_size: fused_image_question_activation_size + + question_image_ffn: + priority: 4.2 + type: FeedForwardNetwork + hidden_sizes: [128,100] + dropout_rate: 0.5 + streams: + inputs: fused_image_question_activations + predictions: rn_activation + globals: + input_size: fused_image_question_activation_size + prediction_size: rn_activation_size + + + ################# PIPE 5: image-question-image size fusion + classification ################# + # Model - image size FFN. + image_size_encoder: + priority: 5.1 + type: FeedForwardNetwork + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 6th subpipeline: concatenation + FF. + concat: + priority: 5.2 + type: Concatenation + input_streams: [rn_activation,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10]] + output_dims: [-1,110] + streams: + outputs: concatenated_activations + globals: + output_size: concatentated_activations_size + + classifier: + priority: 5.3 + type: FeedForwardNetwork + hidden_sizes: [100] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + globals: + input_size: concatentated_activations_size + prediction_size: vocabulary_size_c2 + + #: pipeline diff --git a/ptp/components/models/torch_vision_wrapper.py b/ptp/components/models/torch_vision_wrapper.py index 3419bfb..92e4cf9 100644 --- a/ptp/components/models/torch_vision_wrapper.py +++ b/ptp/components/models/torch_vision_wrapper.py @@ -119,11 +119,24 @@ def __init__(self, name, config): self.model = models.resnet50(pretrained=pretrained) if self.return_feature_maps: - raise ConfigurationError("'resnet50' doesn't support 'return_feature_maps' mode (yet)") + # Get all modules exluding last (avgpool) and (fc) + modules=list(self.model.children())[:-2] + self.model=torch.nn.Sequential(*modules) - # Use the whole model, but cut/reshape only the last layer. - self.output_size = self.globals["output_size"] - self.model.fc = torch.nn.Linear(2048, self.output_size) + # Height of the returned features tensor (SET) + self.feature_maps_height = 7 + self.globals["feature_maps_height"] = self.feature_maps_height + # Width of the returned features tensor (SET) + self.feature_maps_width = 7 + self.globals["feature_maps_width"] = self.feature_maps_width + # Depth of the returned features tensor (SET) + self.feature_maps_depth = 2048 + self.globals["feature_maps_depth"] = self.feature_maps_depth + + else: + # Use the whole model, but cut/reshape only the last layer. + self.output_size = self.globals["output_size"] + self.model.fc = torch.nn.Linear(2048, self.output_size) def input_data_definitions(self): From 5940c24da769fcee6f758e7210804434f35450f4 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 19:40:20 -0700 Subject: [PATCH 04/28] c123_binary_lstm_vgg16_cat_ffn_loss.yml --- .../components/models/sentence_embeddings.yml | 2 +- .../c2_class_lstm_resnet50_ewm_cat_is.yml | 5 +- .../default_c2_classification.yml | 2 +- ...> c123_binary_lstm_vgg16_cat_ffn_loss.yml} | 13 +- ..._binary_lstm_resnet152_is_cat_ffn_loss.yml | 291 ++++++++++++++++++ 5 files changed, 296 insertions(+), 17 deletions(-) rename configs/vqa_med_2019/vf/{c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml => c123_binary_lstm_vgg16_cat_ffn_loss.yml} (95%) create mode 100644 configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml diff --git a/configs/default/components/models/sentence_embeddings.yml b/configs/default/components/models/sentence_embeddings.yml index 0056849..5feccd7 100644 --- a/configs/default/components/models/sentence_embeddings.yml +++ b/configs/default/components/models/sentence_embeddings.yml @@ -13,7 +13,7 @@ source_vocabulary_files: '' # Additional tokens that will be added to vocabulary (LOADED) # This list can be extended, but and are special tokens. # is ALWAYS used for padding shorter sequences. -additional_tokens: ',' +additional_tokens: '' # Enable (end of sequence) token. eos_token: False diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml index 4834d40..eab27f4 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml @@ -5,10 +5,7 @@ training: problem: batch_size: 48 # Appy all preprocessing/data augmentations. - image_preprocessing: normalize - # none | random_affine | random_horizontal_flip | normalize | all question_preprocessing: lowercase,remove_punctuation,tokenize - # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all streams: # Problem is returning tokenized questions. questions: tokenized_questions @@ -16,8 +13,8 @@ training: validation: problem: batch_size: 48 + # Appy all preprocessing/data augmentations. question_preprocessing: lowercase,remove_punctuation,tokenize - # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all streams: # Problem is returning tokenized questions. questions: tokenized_questions diff --git a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml index 68f5880..b4b08d0 100644 --- a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml +++ b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml @@ -82,6 +82,6 @@ pipeline: viewer: type: StreamViewer priority: 100.4 - input_streams: questions,category_names,answers,predicted_answers + input_streams: questions,tokenized_questions,category_names,answers,predicted_answers #: pipeline diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml similarity index 95% rename from configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml rename to configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml index 94af6aa..bdc0488 100644 --- a/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml @@ -5,9 +5,9 @@ default_configs: vqa_med_2019/default_vqa_med_2019.yml training: problem: categories: C1,C2,C3 - export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3.weights.csv sampler: - weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + weights: ~/data/vqa-med/answers.c1_c2_c3.weights.csv # Validation parameters: validation: @@ -16,7 +16,6 @@ validation: pipeline: - name: c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss ################# PIPE 0: SHARED ################# @@ -33,14 +32,6 @@ pipeline: type: BatchSizeStatistics priority: 0.1 - # Questions encoding. - pipe1_question_tokenizer: - priority: 0.2 - type: SentenceTokenizer - streams: - inputs: questions - outputs: tokenized_questions - ################# PIPE 0: CATEGORY ################# # Model 1: question embeddings diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml new file mode 100644 index 0000000..d2fb6d4 --- /dev/null +++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml @@ -0,0 +1,291 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + + +pipeline: + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c123 + word_mappings: word_mappings_all_c123 + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 + C2 + C3 questions ################# + + # Answer encoding for PIPE 5. + pipe5_c123_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c123_answers_ids + globals: + vocabulary_size: vocabulary_size_c123 + word_mappings: word_mappings_all_c123 + + # Sample masking based on categories. + pipe5_c123_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_c2_c3_no_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c123_by_question_categories_indices # NOT USED + masks: pipe5_c123_masks + + # Model 4: FFN C1 answering + pipe5_c123_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123 + + pipe5_c123_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c123_predictions + masks: pipe5_c123_masks + targets: pipe5_c123_answers_ids + loss: pipe5_c123_loss + + pipe5_c123_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c123_masks + predictions: pipe5_c123_predictions + targets: pipe5_c123_answers_ids + globals: + word_mappings: word_mappings_all_c123 + statistics: + precision: pipe5_c123_precision + recall: pipe5_c123_recall + f1score: pipe5_c123_f1score + + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: tokenized_questions,answers, category_names,predicted_question_categories_names, pipe5_c123_masks,pipe5_c123_answers_without_yn_ids,pipe5_c123_predictions + + +#: pipeline From 2371628fb6ecd81e4ba634c1d030d7ab4e5c5eed Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 19:46:20 -0700 Subject: [PATCH 05/28] c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml --- .../vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml index d2fb6d4..6e2c828 100644 --- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml @@ -172,6 +172,7 @@ pipeline: # Image encoder. image_encoder: type: TorchVisionWrapper + model: resnet152 priority: 2.1 streams: inputs: images From d5a11d0ea04a66999e397dcd4c62285db830284e Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 19:51:06 -0700 Subject: [PATCH 06/28] 123_no_binary_lstm_resnet50_is_cat_ffn_loss --- ...o_binary_lstm_resnet50_is_cat_ffn_loss.yml | 292 ++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml new file mode 100644 index 0000000..e95750a --- /dev/null +++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml @@ -0,0 +1,292 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + + +pipeline: + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c123 + word_mappings: word_mappings_all_c123 + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + model: resnet50 + priority: 2.1 + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 + C2 + C3 questions ################# + + # Answer encoding for PIPE 5. + pipe5_c123_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c123_answers_ids + globals: + vocabulary_size: vocabulary_size_c123 + word_mappings: word_mappings_all_c123 + + # Sample masking based on categories. + pipe5_c123_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_c2_c3_no_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c123_by_question_categories_indices # NOT USED + masks: pipe5_c123_masks + + # Model 4: FFN C1 answering + pipe5_c123_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123 + + pipe5_c123_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c123_predictions + masks: pipe5_c123_masks + targets: pipe5_c123_answers_ids + loss: pipe5_c123_loss + + pipe5_c123_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c123_masks + predictions: pipe5_c123_predictions + targets: pipe5_c123_answers_ids + globals: + word_mappings: word_mappings_all_c123 + statistics: + precision: pipe5_c123_precision + recall: pipe5_c123_recall + f1score: pipe5_c123_f1score + + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: tokenized_questions,answers, category_names,predicted_question_categories_names, pipe5_c123_masks,pipe5_c123_answers_without_yn_ids,pipe5_c123_predictions + + +#: pipeline From b7b619a88167e0b8525b4da620e1386dcc629baa Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 20:23:11 -0700 Subject: [PATCH 07/28] cleanup of c123_no_binary cat pipelines with different image encoders --- .../c123_binary_lstm_vgg16_cat_ffn_loss.yml | 93 ++++++++++++------- ..._binary_lstm_resnet152_is_cat_ffn_loss.yml | 20 +++- ...o_binary_lstm_resnet50_is_cat_ffn_loss.yml | 20 +++- 3 files changed, 94 insertions(+), 39 deletions(-) diff --git a/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml index bdc0488..3d5b25a 100644 --- a/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml @@ -5,14 +5,22 @@ default_configs: vqa_med_2019/default_vqa_med_2019.yml training: problem: categories: C1,C2,C3 - export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3.weights.csv + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions sampler: - weights: ~/data/vqa-med/answers.c1_c2_c3.weights.csv + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv # Validation parameters: validation: problem: categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions pipeline: @@ -24,8 +32,8 @@ pipeline: type: GlobalVariablePublisher priority: 0 # Add input_size to globals. - keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix] - values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}] + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}] # Statistics. batch_size: @@ -114,8 +122,6 @@ pipeline: statistics: accuracy: categorization_accuracy - - ################# PIPE 1: SHARED QUESTION ENCODER ################# # Model 1: question embeddings @@ -158,14 +164,15 @@ pipeline: inputs: answers outputs: all_answers_ids globals: - vocabulary_size: vocabulary_size_all_c1_c2_c3_binary - word_mappings: word_mappings_all_c1_c2_c3_binary + vocabulary_size: vocabulary_size_all_c123 + word_mappings: word_mappings_all_c123 ################# PIPE 2: SHARED IMAGE ENCODER ################# # Image encoder. image_encoder: type: TorchVisionWrapper + model: vgg16 priority: 2.1 streams: inputs: images @@ -202,10 +209,10 @@ pipeline: output_size: concatenated_activations_size - ################# PIPE 5: C1 + C2 + C2 + Binary Y/N question ################# + ################# PIPE 5: C1 + C2 + C3 questions ################# # Answer encoding for PIPE 5. - pipe5_all_answer_indexer: + pipe5_c123_answer_indexer: type: LabelIndexer priority: 5.1 data_folder: ~/data/vqa-med @@ -214,72 +221,88 @@ pipeline: export_word_mappings_to_globals: True streams: inputs: answers - outputs: pipe5_all_answers_ids + outputs: pipe5_c123_answers_ids globals: - vocabulary_size: vocabulary_size_c1_c2_c3_binary - word_mappings: word_mappings_all_c1_c2_c3_binary + vocabulary_size: vocabulary_size_c123 + word_mappings: word_mappings_all_c123 # Sample masking based on categories. - pipe5_all_string_to_mask: + pipe5_c123_string_to_mask: priority: 5.2 type: StringToMask globals: - word_mappings: category_c1_c2_c3_binary_yn_word_to_ix + word_mappings: category_c1_c2_c3_no_binary_word_to_ix streams: strings: pipe0_predicted_question_categories_names - string_indices: predicted_c1_c2_c3_binary_by_question_categories_indices # NOT USED - masks: pipe5_all_masks + string_indices: predicted_c123_by_question_categories_indices # NOT USED + masks: pipe5_c123_masks # Model 4: FFN C1 answering - pipe5_all_ffn: + pipe5_c123_ffn: priority: 5.3 type: FeedForwardNetwork hidden: [50] dropout_rate: 0.5 streams: inputs: concatenated_activations - predictions: pipe5_all_predictions + predictions: pipe5_c123_predictions globals: input_size: concatenated_activations_size - prediction_size: vocabulary_size_c1_c2_c3_binary + prediction_size: vocabulary_size_c123 - pipe5_all_nllloss: + pipe5_c123_nllloss: type: NLLLoss priority: 5.4 targets_dim: 1 use_masking: True streams: - predictions: pipe5_all_predictions - masks: pipe5_all_masks - targets: pipe5_all_answers_ids - loss: pipe5_all_loss + predictions: pipe5_c123_predictions + masks: pipe5_c123_masks + targets: pipe5_c123_answers_ids + loss: pipe5_c123_loss - pipe5_all_precision_recall: + pipe5_c123_precision_recall: type: PrecisionRecallStatistics priority: 5.5 use_word_mappings: True use_masking: True - #show_class_scores: True + show_class_scores: True #show_confusion_matrix: True streams: - masks: pipe5_all_masks - predictions: pipe5_all_predictions - targets: pipe5_all_answers_ids + masks: pipe5_c123_masks + predictions: pipe5_c123_predictions + targets: pipe5_c123_answers_ids globals: - word_mappings: word_mappings_all_c1_c2_c3_binary + word_mappings: word_mappings_all_c123 statistics: - precision: pipe5_all_precision - recall: pipe5_all_recall - f1score: pipe5_all_f1score + precision: pipe5_c123_precision + recall: pipe5_c123_recall + f1score: pipe5_c123_f1score + # C123 Predictions decoder. + pipe5_prediction_decoder: + type: WordDecoder + priority: 5.6 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe5_c123_predictions + outputs: pipe5_c123_predicted_answers + globals: + word_mappings: word_mappings_all_c123 ################# PIPE 9: MERGE ANSWERS ################# + # Viewers. viewer: type: StreamViewer priority: 9.3 - input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_all_masks,pipe5_all_answers_without_yn_ids,pipe5_all_predictions + input_streams: + tokenized_questions, category_names, + pipe0_predicted_question_categories_names, + pipe5_c123_masks, + answers, pipe5_c123_predicted_answers #: pipeline diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml index 6e2c828..1de281c 100644 --- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml @@ -266,7 +266,7 @@ pipeline: priority: 5.5 use_word_mappings: True use_masking: True - #show_class_scores: True + show_class_scores: True #show_confusion_matrix: True streams: masks: pipe5_c123_masks @@ -279,14 +279,30 @@ pipeline: recall: pipe5_c123_recall f1score: pipe5_c123_f1score + # C123 Predictions decoder. + pipe5_prediction_decoder: + type: WordDecoder + priority: 5.6 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe5_c123_predictions + outputs: pipe5_c123_predicted_answers + globals: + word_mappings: word_mappings_all_c123 ################# PIPE 9: MERGE ANSWERS ################# + # Viewers. viewer: type: StreamViewer priority: 9.3 - input_streams: tokenized_questions,answers, category_names,predicted_question_categories_names, pipe5_c123_masks,pipe5_c123_answers_without_yn_ids,pipe5_c123_predictions + input_streams: + tokenized_questions, category_names, + pipe0_predicted_question_categories_names, + pipe5_c123_masks, + answers, pipe5_c123_predicted_answers #: pipeline diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml index e95750a..287b2a8 100644 --- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml @@ -266,7 +266,7 @@ pipeline: priority: 5.5 use_word_mappings: True use_masking: True - #show_class_scores: True + show_class_scores: True #show_confusion_matrix: True streams: masks: pipe5_c123_masks @@ -279,14 +279,30 @@ pipeline: recall: pipe5_c123_recall f1score: pipe5_c123_f1score + # C123 Predictions decoder. + pipe5_prediction_decoder: + type: WordDecoder + priority: 5.6 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe5_c123_predictions + outputs: pipe5_c123_predicted_answers + globals: + word_mappings: word_mappings_all_c123 ################# PIPE 9: MERGE ANSWERS ################# + # Viewers. viewer: type: StreamViewer priority: 9.3 - input_streams: tokenized_questions,answers, category_names,predicted_question_categories_names, pipe5_c123_masks,pipe5_c123_answers_without_yn_ids,pipe5_c123_predictions + input_streams: + tokenized_questions, category_names, + pipe0_predicted_question_categories_names, + pipe5_c123_masks, + answers, pipe5_c123_predicted_answers #: pipeline From ef65ef62390eb73e41ab3c08c3ba172708699259 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 21:35:48 -0700 Subject: [PATCH 08/28] c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml + cleanups of v2 c123 pipelines --- .../c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml | 1 + .../c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml | 1 + .../c2_classification_all_rnn_vgg16_ewm_size.yml | 1 + .../vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml | 1 + .../vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml | 1 + 5 files changed, 5 insertions(+) diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml index 8558fd7..b27aea1 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet152_ewm_cat_is.yml @@ -95,6 +95,7 @@ pipeline: type: FeedForwardNetwork hidden_sizes: [100] dropout_rate: 0.5 + use_logsoftmax: False streams: inputs: element_wise_activations predictions: question_image_activations diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml index eab27f4..2db4248 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_ewm_cat_is.yml @@ -92,6 +92,7 @@ pipeline: type: FeedForwardNetwork hidden_sizes: [100] dropout_rate: 0.5 + use_logsoftmax: False streams: inputs: element_wise_activations predictions: question_image_activations diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml index d34899b..1a1f774 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml @@ -79,6 +79,7 @@ pipeline: type: FeedForwardNetwork hidden_sizes: [100] dropout_rate: 0.5 + use_logsoftmax: False streams: inputs: element_wise_activations predictions: question_image_activations diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml index 1de281c..9b86692 100644 --- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml @@ -186,6 +186,7 @@ pipeline: image_size_encoder: type: FeedForwardNetwork priority: 3.1 + use_losfotmax: False streams: inputs: image_sizes predictions: image_size_activations diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml index 287b2a8..3c92309 100644 --- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml @@ -186,6 +186,7 @@ pipeline: image_size_encoder: type: FeedForwardNetwork priority: 3.1 + use_losfotmax: False streams: inputs: image_sizes predictions: image_size_activations From 43f83bc2361f697bb90c3db9832e3bbc23258dae Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 21:40:12 -0700 Subject: [PATCH 09/28] c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml --- ...nary_lstm_resnet50_ewm_is_cat_ffn_loss.yml | 336 ++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml new file mode 100644 index 0000000..fa58b97 --- /dev/null +++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml @@ -0,0 +1,336 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + + +pipeline: + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c1_c2_c3_no_binary_word_to_ix] + values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_question_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_encoder_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c123 + word_mappings: word_mappings_all_c123 + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + model: vgg16 + priority: 2.1 + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + use_losfotmax: False + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: image-question fusion ################# + # Element wise multiplication + FF. + question_image_fusion: + priority: 4.1 + type: ElementWiseMultiplication + dropout_rate: 0.5 + streams: + image_encodings: image_activations + question_encodings: question_activations + outputs: element_wise_activations + globals: + image_encoding_size: image_encoder_output_size + question_encoding_size: question_encoder_output_size + output_size: element_wise_activation_size + + question_image_ffn: + priority: 4.2 + type: FeedForwardNetwork + hidden_sizes: [100] + dropout_rate: 0.5 + use_logsoftmax: False + streams: + inputs: element_wise_activations + predictions: question_image_activations + globals: + input_size: element_wise_activation_size + prediction_size: element_wise_activation_size + + ################# PIPE 5: image-question-image size fusion ################# + + # 5th subpipeline: concatenation + concat: + priority: 5.1 + type: Concatenation + input_streams: [question_image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10]] + output_dims: [-1,110] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding for PIPE 6. + pipe6_c123_answer_indexer: + type: LabelIndexer + priority: 6.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_c123_answers_ids + globals: + vocabulary_size: vocabulary_size_c123 + word_mappings: word_mappings_all_c123 + + # Sample masking based on categories. + pipe6_c123_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_c1_c2_c3_no_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c123_by_question_categories_indices # NOT USED + masks: pipe6_c123_masks + + # Model 4: FFN C123 answering + pipe6_c123_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: [100] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123 + + pipe6_c123_nllloss: + type: NLLLoss + priority: 6.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_c123_predictions + masks: pipe6_c123_masks + targets: pipe6_c123_answers_ids + loss: pipe6_c123_loss + + pipe6_c123_precision_recall: + type: PrecisionRecallStatistics + priority: 6.5 + use_word_mappings: True + use_masking: True + show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_c123_masks + predictions: pipe6_c123_predictions + targets: pipe6_c123_answers_ids + globals: + word_mappings: word_mappings_all_c123 + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_prediction_decoder: + type: WordDecoder + priority: 6.6 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: pipe6_c123_predicted_answers + globals: + word_mappings: word_mappings_all_c123 + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: + tokenized_questions, category_names, + pipe0_predicted_question_categories_names, + pipe6_c123_masks, + answers, pipe6_c123_predicted_answers + + +#: pipeline From 3098a07f0ca3c7370a987a16dfa0f7a83eecc5a3 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 21:49:01 -0700 Subject: [PATCH 10/28] cleanup: priorities first --- ...nary_lstm_resnet50_ewm_is_cat_ffn_loss.yml | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml index fa58b97..dcc89a3 100644 --- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml @@ -29,23 +29,23 @@ pipeline: # Add global variables. global_publisher: - type: GlobalVariablePublisher priority: 0 + type: GlobalVariablePublisher # Add input_size to globals. keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c1_c2_c3_no_binary_word_to_ix] values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}] # Statistics. batch_size: - type: BatchSizeStatistics priority: 0.1 + type: BatchSizeStatistics ################# PIPE 0: CATEGORY ################# # Model 1: question embeddings pipe0_question_embeddings: - type: SentenceEmbeddings priority: 0.3 + type: SentenceEmbeddings # LOAD AND FREEZE # load: file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt @@ -114,8 +114,8 @@ pipeline: word_mappings: category_word_mappings pipe0_category_accuracy: - type: AccuracyStatistics priority: 0.7 + type: AccuracyStatistics streams: targets: category_ids predictions: pipe0_predicted_question_categories_preds @@ -126,8 +126,8 @@ pipeline: # Model 1: question embeddings pipe1_question_embeddings: - type: SentenceEmbeddings priority: 1.1 + type: SentenceEmbeddings embeddings_size: 50 pretrained_embeddings_file: glove.6B.50d.txt data_folder: ~/data/vqa-med @@ -154,8 +154,8 @@ pipeline: # Answer encoding pipe1_all_answer_indexer: - type: LabelIndexer priority: 1.3 + type: LabelIndexer data_folder: ~/data/vqa-med word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv # Export mappings and size to globals. @@ -171,9 +171,9 @@ pipeline: # Image encoder. image_encoder: + priority: 2.1 type: TorchVisionWrapper model: vgg16 - priority: 2.1 streams: inputs: images outputs: image_activations @@ -184,8 +184,8 @@ pipeline: # Model - image size classifier. image_size_encoder: - type: FeedForwardNetwork priority: 3.1 + type: FeedForwardNetwork use_losfotmax: False streams: inputs: image_sizes @@ -242,8 +242,8 @@ pipeline: # Answer encoding for PIPE 6. pipe6_c123_answer_indexer: - type: LabelIndexer priority: 6.1 + type: LabelIndexer data_folder: ~/data/vqa-med word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv # Export mappings and size to globals. @@ -280,8 +280,8 @@ pipeline: prediction_size: vocabulary_size_c123 pipe6_c123_nllloss: - type: NLLLoss priority: 6.4 + type: NLLLoss targets_dim: 1 use_masking: True streams: @@ -291,8 +291,8 @@ pipeline: loss: pipe6_c123_loss pipe6_c123_precision_recall: - type: PrecisionRecallStatistics priority: 6.5 + type: PrecisionRecallStatistics use_word_mappings: True use_masking: True show_class_scores: True @@ -310,8 +310,8 @@ pipeline: # C123 Predictions decoder. pipe5_c123_prediction_decoder: - type: WordDecoder priority: 6.6 + type: WordDecoder # Use the same word mappings as label indexer. import_word_mappings_from_globals: True streams: @@ -324,8 +324,8 @@ pipeline: # Viewers. viewer: - type: StreamViewer priority: 9.3 + type: StreamViewer input_streams: tokenized_questions, category_names, pipe0_predicted_question_categories_names, From 0aaa1a527676bc6ab562f117dde2c7b8dda8f577 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 22:23:05 -0700 Subject: [PATCH 11/28] trainer fix: using name of config file when pipeline name not present --- ptp/workers/trainer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ptp/workers/trainer.py b/ptp/workers/trainer.py index 9a93591..1182574 100644 --- a/ptp/workers/trainer.py +++ b/ptp/workers/trainer.py @@ -156,7 +156,9 @@ def setup_experiment(self): pipeline_name = self.config['pipeline']['name'] except KeyError: # Using name of the first configuration file from command line. - pipeline_name = path.basename(root_configs[0]) + basename = path.basename(root_configs[0]) + # Take config filename without extension. + pipeline_name = path.splitext(basename)[0] # Set pipeline name, so processor can use it afterwards. self.config['pipeline'].add_config_params({'name': pipeline_name}) From 6ad9c28ae2975d3536dd88f6f3e8c6dd62d191b0 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 23:32:07 -0700 Subject: [PATCH 12/28] cleanup and rename of simple vf configs --- ...net152_is_cat_ffn_c123_no_binary_loss.yml} | 20 +- ..._ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml} | 28 +- ...t50_ewm_is_cat_ffn_c123_no_binary_loss.yml | 334 ++++++++++++++++++ ...snet50_is_cat_ffn_c123_no_binary_loss.yml} | 20 +- ..._vgg16_is_cat_ffn_c123_binary_yn_loss.yml} | 21 +- 5 files changed, 377 insertions(+), 46 deletions(-) rename configs/vqa_med_2019/vf/{c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml => lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml} (94%) rename configs/vqa_med_2019/vf/{c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml => lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml} (94%) create mode 100644 configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml rename configs/vqa_med_2019/vf/{c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml => lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml} (94%) rename configs/vqa_med_2019/vf/{c123_binary_lstm_vgg16_cat_ffn_loss.yml => lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml} (94%) diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml similarity index 94% rename from configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml rename to configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml index 9b86692..b52cf92 100644 --- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml @@ -32,7 +32,7 @@ pipeline: type: GlobalVariablePublisher priority: 0 # Add input_size to globals. - keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix] + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c123_without_yn_word_to_ix] values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}] # Statistics. @@ -74,7 +74,6 @@ pipeline: prediction_mode: Last initial_state: Trainable use_logsoftmax: False - dropout_rate: 0.5 streams: inputs: pipe0_embedded_questions predictions: pipe0_questions_activations @@ -144,7 +143,6 @@ pipeline: prediction_mode: Last initial_state: Trainable use_logsoftmax: False - dropout_rate: 0.5 streams: inputs: embedded_questions predictions: questions_activations @@ -164,8 +162,8 @@ pipeline: inputs: answers outputs: all_answers_ids globals: - vocabulary_size: vocabulary_size_all_c123 - word_mappings: word_mappings_all_c123 + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn ################# PIPE 2: SHARED IMAGE ENCODER ################# @@ -224,15 +222,15 @@ pipeline: inputs: answers outputs: pipe5_c123_answers_ids globals: - vocabulary_size: vocabulary_size_c123 - word_mappings: word_mappings_all_c123 + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn # Sample masking based on categories. pipe5_c123_string_to_mask: priority: 5.2 type: StringToMask globals: - word_mappings: category_c1_c2_c3_no_binary_word_to_ix + word_mappings: category_c123_without_yn_word_to_ix streams: strings: pipe0_predicted_question_categories_names string_indices: predicted_c123_by_question_categories_indices # NOT USED @@ -249,7 +247,7 @@ pipeline: predictions: pipe5_c123_predictions globals: input_size: concatenated_activations_size - prediction_size: vocabulary_size_c123 + prediction_size: vocabulary_size_c123_without_yn pipe5_c123_nllloss: type: NLLLoss @@ -274,7 +272,7 @@ pipeline: predictions: pipe5_c123_predictions targets: pipe5_c123_answers_ids globals: - word_mappings: word_mappings_all_c123 + word_mappings: word_mappings_c123_without_yn statistics: precision: pipe5_c123_precision recall: pipe5_c123_recall @@ -290,7 +288,7 @@ pipeline: inputs: pipe5_c123_predictions outputs: pipe5_c123_predicted_answers globals: - word_mappings: word_mappings_all_c123 + word_mappings: word_mappings_c123_without_yn ################# PIPE 9: MERGE ANSWERS ################# diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml similarity index 94% rename from configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml rename to configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml index dcc89a3..b0db77b 100644 --- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml @@ -32,7 +32,7 @@ pipeline: priority: 0 type: GlobalVariablePublisher # Add input_size to globals. - keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c1_c2_c3_no_binary_word_to_ix] + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c123_without_yn_word_to_ix] values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}] # Statistics. @@ -74,7 +74,6 @@ pipeline: prediction_mode: Last initial_state: Trainable use_logsoftmax: False - dropout_rate: 0.5 streams: inputs: pipe0_embedded_questions predictions: pipe0_question_activations @@ -144,7 +143,6 @@ pipeline: prediction_mode: Last initial_state: Trainable use_logsoftmax: False - dropout_rate: 0.5 streams: inputs: embedded_questions predictions: question_activations @@ -164,8 +162,8 @@ pipeline: inputs: answers outputs: all_answers_ids globals: - vocabulary_size: vocabulary_size_all_c123 - word_mappings: word_mappings_all_c123 + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn ################# PIPE 2: SHARED IMAGE ENCODER ################# @@ -173,7 +171,7 @@ pipeline: image_encoder: priority: 2.1 type: TorchVisionWrapper - model: vgg16 + model: resnet50 streams: inputs: images outputs: image_activations @@ -252,15 +250,15 @@ pipeline: inputs: answers outputs: pipe6_c123_answers_ids globals: - vocabulary_size: vocabulary_size_c123 - word_mappings: word_mappings_all_c123 + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn # Sample masking based on categories. pipe6_c123_string_to_mask: priority: 6.2 type: StringToMask globals: - word_mappings: category_c1_c2_c3_no_binary_word_to_ix + word_mappings: category_c123_without_yn_word_to_ix streams: strings: pipe0_predicted_question_categories_names string_indices: predicted_c123_by_question_categories_indices # NOT USED @@ -277,7 +275,7 @@ pipeline: predictions: pipe6_c123_predictions globals: input_size: concatenated_activations_size - prediction_size: vocabulary_size_c123 + prediction_size: vocabulary_size_c123_without_yn pipe6_c123_nllloss: priority: 6.4 @@ -302,7 +300,7 @@ pipeline: predictions: pipe6_c123_predictions targets: pipe6_c123_answers_ids globals: - word_mappings: word_mappings_all_c123 + word_mappings: word_mappings_c123_without_yn statistics: precision: pipe6_c123_precision recall: pipe6_c123_recall @@ -318,7 +316,13 @@ pipeline: inputs: pipe6_c123_predictions outputs: pipe6_c123_predicted_answers globals: - word_mappings: word_mappings_all_c123 + word_mappings: word_mappings_c123_without_yn + + + + + + ################# PIPE 9: MERGE ANSWERS ################# diff --git a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml new file mode 100644 index 0000000..cba072b --- /dev/null +++ b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml @@ -0,0 +1,334 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + + +pipeline: + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c123_without_yn_word_to_ix] + values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}] + + # Statistics. + batch_size: + priority: 0.1 + type: BatchSizeStatistics + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + priority: 0.3 + type: SentenceEmbeddings + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_question_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_encoder_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + priority: 0.7 + type: AccuracyStatistics + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + priority: 1.1 + type: SentenceEmbeddings + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + # Answer encoding + pipe1_all_answer_indexer: + priority: 1.3 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + priority: 2.1 + type: TorchVisionWrapper + model: resnet50 + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + priority: 3.1 + type: FeedForwardNetwork + use_losfotmax: False + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: image-question fusion ################# + # Element wise multiplication + FF. + question_image_fusion: + priority: 4.1 + type: ElementWiseMultiplication + dropout_rate: 0.5 + streams: + image_encodings: image_activations + question_encodings: question_activations + outputs: element_wise_activations + globals: + image_encoding_size: image_encoder_output_size + question_encoding_size: question_encoder_output_size + output_size: element_wise_activation_size + + question_image_ffn: + priority: 4.2 + type: FeedForwardNetwork + hidden_sizes: [100] + dropout_rate: 0.5 + use_logsoftmax: False + streams: + inputs: element_wise_activations + predictions: question_image_activations + globals: + input_size: element_wise_activation_size + prediction_size: element_wise_activation_size + + ################# PIPE 5: image-question-image size fusion ################# + + # 5th subpipeline: concatenation + concat: + priority: 5.1 + type: Concatenation + input_streams: [question_image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10]] + output_dims: [-1,110] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding for PIPE 6. + pipe6_c123_answer_indexer: + priority: 6.1 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_c123_answers_ids + globals: + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn + + # Sample masking based on categories. + pipe6_c123_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_c123_without_yn_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c123_by_question_categories_indices # NOT USED + masks: pipe6_c123_masks + + # Model 4: FFN C123 answering + pipe6_c123_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: [100] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_without_yn + + pipe6_c123_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_c123_predictions + masks: pipe6_c123_masks + targets: pipe6_c123_answers_ids + loss: pipe6_c123_loss + + pipe6_c123_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + use_masking: True + show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_c123_masks + predictions: pipe6_c123_predictions + targets: pipe6_c123_answers_ids + globals: + word_mappings: word_mappings_c123_without_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: pipe6_c123_predicted_answers + globals: + word_mappings: word_mappings_c123_without_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, category_names, + pipe0_predicted_question_categories_names, + pipe6_c123_masks, + answers, pipe6_c123_predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml similarity index 94% rename from configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml rename to configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml index 3c92309..88a3bd7 100644 --- a/configs/vqa_med_2019/vf/c123_no_binary_lstm_resnet50_is_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml @@ -32,7 +32,7 @@ pipeline: type: GlobalVariablePublisher priority: 0 # Add input_size to globals. - keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix] + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c123_without_yn_word_to_ix] values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}] # Statistics. @@ -74,7 +74,6 @@ pipeline: prediction_mode: Last initial_state: Trainable use_logsoftmax: False - dropout_rate: 0.5 streams: inputs: pipe0_embedded_questions predictions: pipe0_questions_activations @@ -144,7 +143,6 @@ pipeline: prediction_mode: Last initial_state: Trainable use_logsoftmax: False - dropout_rate: 0.5 streams: inputs: embedded_questions predictions: questions_activations @@ -164,8 +162,8 @@ pipeline: inputs: answers outputs: all_answers_ids globals: - vocabulary_size: vocabulary_size_all_c123 - word_mappings: word_mappings_all_c123 + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn ################# PIPE 2: SHARED IMAGE ENCODER ################# @@ -224,15 +222,15 @@ pipeline: inputs: answers outputs: pipe5_c123_answers_ids globals: - vocabulary_size: vocabulary_size_c123 - word_mappings: word_mappings_all_c123 + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn # Sample masking based on categories. pipe5_c123_string_to_mask: priority: 5.2 type: StringToMask globals: - word_mappings: category_c1_c2_c3_no_binary_word_to_ix + word_mappings: category_c123_without_yn_word_to_ix streams: strings: pipe0_predicted_question_categories_names string_indices: predicted_c123_by_question_categories_indices # NOT USED @@ -249,7 +247,7 @@ pipeline: predictions: pipe5_c123_predictions globals: input_size: concatenated_activations_size - prediction_size: vocabulary_size_c123 + prediction_size: vocabulary_size_c123_without_yn pipe5_c123_nllloss: type: NLLLoss @@ -274,7 +272,7 @@ pipeline: predictions: pipe5_c123_predictions targets: pipe5_c123_answers_ids globals: - word_mappings: word_mappings_all_c123 + word_mappings: word_mappings_c123_without_yn statistics: precision: pipe5_c123_precision recall: pipe5_c123_recall @@ -290,7 +288,7 @@ pipeline: inputs: pipe5_c123_predictions outputs: pipe5_c123_predicted_answers globals: - word_mappings: word_mappings_all_c123 + word_mappings: word_mappings_c123_without_yn ################# PIPE 9: MERGE ANSWERS ################# diff --git a/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml similarity index 94% rename from configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml rename to configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml index 3d5b25a..a4d45b8 100644 --- a/configs/vqa_med_2019/vf/c123_binary_lstm_vgg16_cat_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml @@ -32,8 +32,8 @@ pipeline: type: GlobalVariablePublisher priority: 0 # Add input_size to globals. - keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_no_binary_word_to_ix] - values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}] + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}] # Statistics. batch_size: @@ -74,7 +74,6 @@ pipeline: prediction_mode: Last initial_state: Trainable use_logsoftmax: False - dropout_rate: 0.5 streams: inputs: pipe0_embedded_questions predictions: pipe0_questions_activations @@ -93,7 +92,6 @@ pipeline: freeze: True ################### hidden: [50] - dropout_rate: 0.5 streams: inputs: pipe0_questions_activations predictions: pipe0_predicted_question_categories_preds @@ -144,7 +142,6 @@ pipeline: prediction_mode: Last initial_state: Trainable use_logsoftmax: False - dropout_rate: 0.5 streams: inputs: embedded_questions predictions: questions_activations @@ -165,7 +162,7 @@ pipeline: outputs: all_answers_ids globals: vocabulary_size: vocabulary_size_all_c123 - word_mappings: word_mappings_all_c123 + word_mappings: word_mappings_all_c123_binary_yn ################# PIPE 2: SHARED IMAGE ENCODER ################# @@ -223,15 +220,15 @@ pipeline: inputs: answers outputs: pipe5_c123_answers_ids globals: - vocabulary_size: vocabulary_size_c123 - word_mappings: word_mappings_all_c123 + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_all_c123_binary_yn # Sample masking based on categories. pipe5_c123_string_to_mask: priority: 5.2 type: StringToMask globals: - word_mappings: category_c1_c2_c3_no_binary_word_to_ix + word_mappings: category_c1_c2_c3_binary_yn_word_to_ix streams: strings: pipe0_predicted_question_categories_names string_indices: predicted_c123_by_question_categories_indices # NOT USED @@ -248,7 +245,7 @@ pipeline: predictions: pipe5_c123_predictions globals: input_size: concatenated_activations_size - prediction_size: vocabulary_size_c123 + prediction_size: vocabulary_size_c123_binary_yn pipe5_c123_nllloss: type: NLLLoss @@ -273,7 +270,7 @@ pipeline: predictions: pipe5_c123_predictions targets: pipe5_c123_answers_ids globals: - word_mappings: word_mappings_all_c123 + word_mappings: word_mappings_all_c123_binary_yn statistics: precision: pipe5_c123_precision recall: pipe5_c123_recall @@ -289,7 +286,7 @@ pipeline: inputs: pipe5_c123_predictions outputs: pipe5_c123_predicted_answers globals: - word_mappings: word_mappings_all_c123 + word_mappings: word_mappings_all_c123_binary_yn ################# PIPE 9: MERGE ANSWERS ################# From d8c588c189f4b154f00e92075986e3d308e4c2aa Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 23:39:49 -0700 Subject: [PATCH 13/28] lstm_vgg16_is_cat_ffn_only_yn_loss.yml --- .../lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml | 305 ++++++++++++++++++ .../vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml | 305 ++++++++++++++++++ 2 files changed, 610 insertions(+) create mode 100644 configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml create mode 100644 configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml new file mode 100644 index 0000000..a4d45b8 --- /dev/null +++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml @@ -0,0 +1,305 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + + +pipeline: + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c123 + word_mappings: word_mappings_all_c123_binary_yn + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + model: vgg16 + priority: 2.1 + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 + C2 + C3 questions ################# + + # Answer encoding for PIPE 5. + pipe5_c123_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c123_answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_all_c123_binary_yn + + # Sample masking based on categories. + pipe5_c123_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_c2_c3_binary_yn_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c123_by_question_categories_indices # NOT USED + masks: pipe5_c123_masks + + # Model 4: FFN C1 answering + pipe5_c123_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe5_c123_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c123_predictions + masks: pipe5_c123_masks + targets: pipe5_c123_answers_ids + loss: pipe5_c123_loss + + pipe5_c123_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c123_masks + predictions: pipe5_c123_predictions + targets: pipe5_c123_answers_ids + globals: + word_mappings: word_mappings_all_c123_binary_yn + statistics: + precision: pipe5_c123_precision + recall: pipe5_c123_recall + f1score: pipe5_c123_f1score + + # C123 Predictions decoder. + pipe5_prediction_decoder: + type: WordDecoder + priority: 5.6 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe5_c123_predictions + outputs: pipe5_c123_predicted_answers + globals: + word_mappings: word_mappings_all_c123_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: + tokenized_questions, category_names, + pipe0_predicted_question_categories_names, + pipe5_c123_masks, + answers, pipe5_c123_predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml new file mode 100644 index 0000000..be6e659 --- /dev/null +++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml @@ -0,0 +1,305 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.binary_yn.weights.csv + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + + +pipeline: + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_binary_yn_word_to_ix] + values: [100, 2, 10, 100, {"BINARY": 3}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + model: vgg16 + priority: 2.1 + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 + C2 + C3 questions ################# + + # Answer encoding for PIPE 5. + pipe5_binary_yn_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_binary_yn_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + # Sample masking based on categories. + pipe5_binary_yn_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_binary_yn_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c123_by_question_categories_indices # NOT USED + masks: pipe5_binary_yn_masks + + # Model 4: FFN C1 answering + pipe5_binary_yn_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_binary_yn_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + + pipe5_binary_yn_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_binary_yn_predictions + masks: pipe5_binary_yn_masks + targets: pipe5_binary_yn_answers_ids + loss: pipe5_binary_yn_loss + + pipe5_binary_yn_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_binary_yn_masks + predictions: pipe5_binary_yn_predictions + targets: pipe5_binary_yn_answers_ids + globals: + word_mappings: word_mappings_binary_yn + statistics: + precision: pipe5_binary_yn_precision + recall: pipe5_binary_yn_recall + f1score: pipe5_binary_yn_f1score + + # C123 Predictions decoder. + pipe5_prediction_decoder: + type: WordDecoder + priority: 5.6 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe5_binary_yn_predictions + outputs: pipe5_binary_yn_predicted_answers + globals: + word_mappings: word_mappings_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: + tokenized_questions, category_names, + pipe0_predicted_question_categories_names, + pipe5_binary_yn_masks, + answers, pipe5_binary_yn_predicted_answers + + +#: pipeline From 529fb993f7fd773c76fa01a04f549ee8163a3ef6 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 23:48:11 -0700 Subject: [PATCH 14/28] configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml --- .../lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml | 68 +++++++++---------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml index a4d45b8..b9b4dc2 100644 --- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml @@ -32,8 +32,8 @@ pipeline: type: GlobalVariablePublisher priority: 0 # Add input_size to globals. - keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix] - values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}] + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_without_yn_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}] # Statistics. batch_size: @@ -154,15 +154,15 @@ pipeline: type: LabelIndexer priority: 1.3 data_folder: ~/data/vqa-med - word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv # Export mappings and size to globals. export_word_mappings_to_globals: True streams: inputs: answers outputs: all_answers_ids globals: - vocabulary_size: vocabulary_size_all_c123 - word_mappings: word_mappings_all_c123_binary_yn + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn ################# PIPE 2: SHARED IMAGE ENCODER ################# @@ -209,56 +209,56 @@ pipeline: ################# PIPE 5: C1 + C2 + C3 questions ################# # Answer encoding for PIPE 5. - pipe5_c123_answer_indexer: + pipe5_c123_without_yn_answer_indexer: type: LabelIndexer priority: 5.1 data_folder: ~/data/vqa-med - word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv # Export mappings and size to globals. export_word_mappings_to_globals: True streams: inputs: answers - outputs: pipe5_c123_answers_ids + outputs: pipe5_c123_without_yn_answers_ids globals: - vocabulary_size: vocabulary_size_c123_binary_yn - word_mappings: word_mappings_all_c123_binary_yn + vocabulary_size: vocabulary_size_c123_without_yn + word_mappings: word_mappings_c123_without_yn # Sample masking based on categories. - pipe5_c123_string_to_mask: + pipe5_c123_without_yn_string_to_mask: priority: 5.2 type: StringToMask globals: - word_mappings: category_c1_c2_c3_binary_yn_word_to_ix + word_mappings: category_c1_c2_c3_without_yn_word_to_ix streams: strings: pipe0_predicted_question_categories_names string_indices: predicted_c123_by_question_categories_indices # NOT USED - masks: pipe5_c123_masks + masks: pipe5_c123_without_yn_masks # Model 4: FFN C1 answering - pipe5_c123_ffn: + pipe5_c123_without_yn_ffn: priority: 5.3 type: FeedForwardNetwork hidden: [50] dropout_rate: 0.5 streams: inputs: concatenated_activations - predictions: pipe5_c123_predictions + predictions: pipe5_c123_without_yn_predictions globals: input_size: concatenated_activations_size - prediction_size: vocabulary_size_c123_binary_yn + prediction_size: vocabulary_size_c123_without_yn - pipe5_c123_nllloss: + pipe5_c123_without_yn_nllloss: type: NLLLoss priority: 5.4 targets_dim: 1 use_masking: True streams: - predictions: pipe5_c123_predictions - masks: pipe5_c123_masks - targets: pipe5_c123_answers_ids - loss: pipe5_c123_loss + predictions: pipe5_c123_without_yn_predictions + masks: pipe5_c123_without_yn_masks + targets: pipe5_c123_without_yn_answers_ids + loss: pipe5_c123_without_yn_loss - pipe5_c123_precision_recall: + pipe5_c123_without_yn_precision_recall: type: PrecisionRecallStatistics priority: 5.5 use_word_mappings: True @@ -266,15 +266,15 @@ pipeline: show_class_scores: True #show_confusion_matrix: True streams: - masks: pipe5_c123_masks - predictions: pipe5_c123_predictions - targets: pipe5_c123_answers_ids + masks: pipe5_c123_without_yn_masks + predictions: pipe5_c123_without_yn_predictions + targets: pipe5_c123_without_yn_answers_ids globals: - word_mappings: word_mappings_all_c123_binary_yn + word_mappings: word_mappings_c123_without_yn statistics: - precision: pipe5_c123_precision - recall: pipe5_c123_recall - f1score: pipe5_c123_f1score + precision: pipe5_c123_without_yn_precision + recall: pipe5_c123_without_yn_recall + f1score: pipe5_c123_without_yn_f1score # C123 Predictions decoder. pipe5_prediction_decoder: @@ -283,10 +283,10 @@ pipeline: # Use the same word mappings as label indexer. import_word_mappings_from_globals: True streams: - inputs: pipe5_c123_predictions - outputs: pipe5_c123_predicted_answers + inputs: pipe5_c123_without_yn_predictions + outputs: pipe5_c123_without_yn_predicted_answers globals: - word_mappings: word_mappings_all_c123_binary_yn + word_mappings: word_mappings_c123_without_yn ################# PIPE 9: MERGE ANSWERS ################# @@ -298,8 +298,8 @@ pipeline: input_streams: tokenized_questions, category_names, pipe0_predicted_question_categories_names, - pipe5_c123_masks, - answers, pipe5_c123_predicted_answers + pipe5_c123_without_yn_masks, + answers, pipe5_c123_without_yn_predicted_answers #: pipeline From 1c4722ae53cb82c4b3339e4a7009c0acaa0ad04c Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 29 Apr 2019 23:57:37 -0700 Subject: [PATCH 15/28] extend_answers working on tokenized_answers returned from problem --- configs/vqa_med_2019/default_extend_answers.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/configs/vqa_med_2019/default_extend_answers.yml b/configs/vqa_med_2019/default_extend_answers.yml index 270d5d1..929c555 100644 --- a/configs/vqa_med_2019/default_extend_answers.yml +++ b/configs/vqa_med_2019/default_extend_answers.yml @@ -9,6 +9,10 @@ training_answers: categories: all resize_image: &resize_image [224, 224] batch_size: 64 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions dataloader: # No sampler, process samples in the same order. shuffle: false @@ -22,6 +26,10 @@ validation_answers: split: validation resize_image: *resize_image batch_size: 64 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions dataloader: # No sampler, process samples in the same order. shuffle: false @@ -37,6 +45,10 @@ test_answers: split: test resize_image: *resize_image batch_size: 64 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions dataloader: # No sampler, process samples in the same order. shuffle: false @@ -45,7 +57,7 @@ test_answers: # Add component for exporting answers to files. pipeline: - disable: viewer + disable: viewer,question_tokenizer # # Viewers. viewer_extended: priority: 100.4 From da3c2f818c11d5029dc64c0ca6a6928db6f0986a Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 00:04:50 -0700 Subject: [PATCH 16/28] default_extended_answers: predicted_answers --- configs/vqa_med_2019/default_extend_answers.yml | 10 ++++++++-- .../vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml | 4 ++-- .../vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml | 6 +++--- .../vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml | 8 ++++---- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/configs/vqa_med_2019/default_extend_answers.yml b/configs/vqa_med_2019/default_extend_answers.yml index 929c555..97e9ddf 100644 --- a/configs/vqa_med_2019/default_extend_answers.yml +++ b/configs/vqa_med_2019/default_extend_answers.yml @@ -63,12 +63,18 @@ pipeline: priority: 100.4 type: StreamViewer sample_number: 0 - input_streams: indices,image_ids,questions,category_names,predicted_categories,answers,tokenized_answers,predicted_answers + input_streams: + indices,image_ids,tokenized_questions, + category_names,predicted_categories, + answers,tokenized_answers,predicted_answers exporter: priority: 100.5 type: StreamFileExporter separator: '|' - input_streams: indices,image_ids,questions,category_names,predicted_categories,answers,tokenized_answers,predicted_answers + input_streams: + indices,image_ids,tokenized_questions, + category_names,predicted_categories, + answers,tokenized_answers,predicted_answers #: pipeline diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml index a4d45b8..a420f14 100644 --- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml @@ -284,7 +284,7 @@ pipeline: import_word_mappings_from_globals: True streams: inputs: pipe5_c123_predictions - outputs: pipe5_c123_predicted_answers + outputs: predicted_answers globals: word_mappings: word_mappings_all_c123_binary_yn @@ -299,7 +299,7 @@ pipeline: tokenized_questions, category_names, pipe0_predicted_question_categories_names, pipe5_c123_masks, - answers, pipe5_c123_predicted_answers + answers, predicted_answers #: pipeline diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml index b9b4dc2..1bf7bdc 100644 --- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml @@ -238,7 +238,7 @@ pipeline: pipe5_c123_without_yn_ffn: priority: 5.3 type: FeedForwardNetwork - hidden: [50] + hidden: [100] dropout_rate: 0.5 streams: inputs: concatenated_activations @@ -284,7 +284,7 @@ pipeline: import_word_mappings_from_globals: True streams: inputs: pipe5_c123_without_yn_predictions - outputs: pipe5_c123_without_yn_predicted_answers + outputs: predicted_answers globals: word_mappings: word_mappings_c123_without_yn @@ -299,7 +299,7 @@ pipeline: tokenized_questions, category_names, pipe0_predicted_question_categories_names, pipe5_c123_without_yn_masks, - answers, pipe5_c123_without_yn_predicted_answers + answers, predicted_answers #: pipeline diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml index be6e659..03c2cea 100644 --- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml @@ -235,10 +235,10 @@ pipeline: masks: pipe5_binary_yn_masks # Model 4: FFN C1 answering - pipe5_binary_yn_ffn: + pipe5_binary_yn_classifier: priority: 5.3 type: FeedForwardNetwork - hidden: [50] + hidden: [100] dropout_rate: 0.5 streams: inputs: concatenated_activations @@ -284,7 +284,7 @@ pipeline: import_word_mappings_from_globals: True streams: inputs: pipe5_binary_yn_predictions - outputs: pipe5_binary_yn_predicted_answers + outputs: predicted_answers globals: word_mappings: word_mappings_binary_yn @@ -299,7 +299,7 @@ pipeline: tokenized_questions, category_names, pipe0_predicted_question_categories_names, pipe5_binary_yn_masks, - answers, pipe5_binary_yn_predicted_answers + answers, predicted_answers #: pipeline From 32706a916be5bad6e11c7223aab6983236d09f70 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 00:23:15 -0700 Subject: [PATCH 17/28] configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml --- ...m_vgg16_is_cat_ffn_c123_binary_yn_loss.yml | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml index a420f14..71c3946 100644 --- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml @@ -161,8 +161,8 @@ pipeline: inputs: answers outputs: all_answers_ids globals: - vocabulary_size: vocabulary_size_all_c123 - word_mappings: word_mappings_all_c123_binary_yn + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn ################# PIPE 2: SHARED IMAGE ENCODER ################# @@ -206,10 +206,10 @@ pipeline: output_size: concatenated_activations_size - ################# PIPE 5: C1 + C2 + C3 questions ################# + ################# PIPE 5: C1 + C2 + C3 + BINARY questions ################# # Answer encoding for PIPE 5. - pipe5_c123_answer_indexer: + pipe5_c123_binary_yn_answer_indexer: type: LabelIndexer priority: 5.1 data_folder: ~/data/vqa-med @@ -218,13 +218,13 @@ pipeline: export_word_mappings_to_globals: True streams: inputs: answers - outputs: pipe5_c123_answers_ids + outputs: pipe5_c123_binary_yn_answers_ids globals: vocabulary_size: vocabulary_size_c123_binary_yn - word_mappings: word_mappings_all_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn # Sample masking based on categories. - pipe5_c123_string_to_mask: + pipe5_c123_binary_yn_string_to_mask: priority: 5.2 type: StringToMask globals: @@ -232,33 +232,33 @@ pipeline: streams: strings: pipe0_predicted_question_categories_names string_indices: predicted_c123_by_question_categories_indices # NOT USED - masks: pipe5_c123_masks + masks: pipe5_c123_binary_yn_masks # Model 4: FFN C1 answering - pipe5_c123_ffn: + pipe5_c123_binary_yn_ffn: priority: 5.3 type: FeedForwardNetwork - hidden: [50] + hidden: [100] dropout_rate: 0.5 streams: inputs: concatenated_activations - predictions: pipe5_c123_predictions + predictions: pipe5_c123_binary_yn_predictions globals: input_size: concatenated_activations_size prediction_size: vocabulary_size_c123_binary_yn - pipe5_c123_nllloss: + pipe5_c123_binary_yn_nllloss: type: NLLLoss priority: 5.4 targets_dim: 1 use_masking: True streams: - predictions: pipe5_c123_predictions - masks: pipe5_c123_masks - targets: pipe5_c123_answers_ids - loss: pipe5_c123_loss + predictions: pipe5_c123_binary_yn_predictions + masks: pipe5_c123_binary_yn_masks + targets: pipe5_c123_binary_yn_answers_ids + loss: pipe5_c123_binary_yn_loss - pipe5_c123_precision_recall: + pipe5_c123_binary_yn_precision_recall: type: PrecisionRecallStatistics priority: 5.5 use_word_mappings: True @@ -266,15 +266,15 @@ pipeline: show_class_scores: True #show_confusion_matrix: True streams: - masks: pipe5_c123_masks - predictions: pipe5_c123_predictions - targets: pipe5_c123_answers_ids + masks: pipe5_c123_binary_yn_masks + predictions: pipe5_c123_binary_yn_predictions + targets: pipe5_c123_binary_yn_answers_ids globals: - word_mappings: word_mappings_all_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn statistics: - precision: pipe5_c123_precision - recall: pipe5_c123_recall - f1score: pipe5_c123_f1score + precision: pipe5_c123_binary_yn_precision + recall: pipe5_c123_binary_yn_recall + f1score: pipe5_c123_binary_yn_f1score # C123 Predictions decoder. pipe5_prediction_decoder: @@ -283,10 +283,10 @@ pipeline: # Use the same word mappings as label indexer. import_word_mappings_from_globals: True streams: - inputs: pipe5_c123_predictions + inputs: pipe5_c123_binary_yn_predictions outputs: predicted_answers globals: - word_mappings: word_mappings_all_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn ################# PIPE 9: MERGE ANSWERS ################# @@ -298,7 +298,7 @@ pipeline: input_streams: tokenized_questions, category_names, pipe0_predicted_question_categories_names, - pipe5_c123_masks, + pipe5_c123_binary_yn_masks, answers, predicted_answers From 5556abd5a201b9ee7fadaeb6b5e9c4f13bf1e9d1 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 00:45:23 -0700 Subject: [PATCH 18/28] configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml --- ...snet152_is_cat_ffn_c123_no_binary_loss.yml | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml index b52cf92..5a541f2 100644 --- a/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml @@ -155,7 +155,7 @@ pipeline: type: LabelIndexer priority: 1.3 data_folder: ~/data/vqa-med - word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv # Export mappings and size to globals. export_word_mappings_to_globals: True streams: @@ -211,22 +211,22 @@ pipeline: ################# PIPE 5: C1 + C2 + C3 questions ################# # Answer encoding for PIPE 5. - pipe5_c123_answer_indexer: + pipe5_c123_without_yn_answer_indexer: type: LabelIndexer priority: 5.1 data_folder: ~/data/vqa-med - word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv # Export mappings and size to globals. export_word_mappings_to_globals: True streams: inputs: answers - outputs: pipe5_c123_answers_ids + outputs: pipe5_c123_without_yn_answers_ids globals: vocabulary_size: vocabulary_size_c123_without_yn word_mappings: word_mappings_c123_without_yn # Sample masking based on categories. - pipe5_c123_string_to_mask: + pipe5_c123_without_yn_string_to_mask: priority: 5.2 type: StringToMask globals: @@ -234,33 +234,33 @@ pipeline: streams: strings: pipe0_predicted_question_categories_names string_indices: predicted_c123_by_question_categories_indices # NOT USED - masks: pipe5_c123_masks + masks: pipe5_c123_without_yn_masks # Model 4: FFN C1 answering - pipe5_c123_ffn: + pipe5_c123_without_yn_ffn: priority: 5.3 type: FeedForwardNetwork - hidden: [50] + hidden: [100] dropout_rate: 0.5 streams: inputs: concatenated_activations - predictions: pipe5_c123_predictions + predictions: pipe5_c123_without_yn_predictions globals: input_size: concatenated_activations_size prediction_size: vocabulary_size_c123_without_yn - pipe5_c123_nllloss: + pipe5_c123_without_yn_nllloss: type: NLLLoss priority: 5.4 targets_dim: 1 use_masking: True streams: - predictions: pipe5_c123_predictions - masks: pipe5_c123_masks - targets: pipe5_c123_answers_ids - loss: pipe5_c123_loss + predictions: pipe5_c123_without_yn_predictions + masks: pipe5_c123_without_yn_masks + targets: pipe5_c123_without_yn_answers_ids + loss: pipe5_c123_without_yn_loss - pipe5_c123_precision_recall: + pipe5_c123_without_yn_precision_recall: type: PrecisionRecallStatistics priority: 5.5 use_word_mappings: True @@ -268,15 +268,15 @@ pipeline: show_class_scores: True #show_confusion_matrix: True streams: - masks: pipe5_c123_masks - predictions: pipe5_c123_predictions - targets: pipe5_c123_answers_ids + masks: pipe5_c123_without_yn_masks + predictions: pipe5_c123_without_yn_predictions + targets: pipe5_c123_without_yn_answers_ids globals: word_mappings: word_mappings_c123_without_yn statistics: - precision: pipe5_c123_precision - recall: pipe5_c123_recall - f1score: pipe5_c123_f1score + precision: pipe5_c123_without_yn_precision + recall: pipe5_c123_without_yn_recall + f1score: pipe5_c123_without_yn_f1score # C123 Predictions decoder. pipe5_prediction_decoder: @@ -285,8 +285,8 @@ pipeline: # Use the same word mappings as label indexer. import_word_mappings_from_globals: True streams: - inputs: pipe5_c123_predictions - outputs: pipe5_c123_predicted_answers + inputs: pipe5_c123_without_yn_predictions + outputs: predicted_answers globals: word_mappings: word_mappings_c123_without_yn @@ -300,8 +300,8 @@ pipeline: input_streams: tokenized_questions, category_names, pipe0_predicted_question_categories_names, - pipe5_c123_masks, - answers, pipe5_c123_predicted_answers + pipe5_c123_without_yn_masks, + answers, predicted_answers #: pipeline From a3ff779311ca6da32d8f83fef14170a2cd961a01 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 00:52:34 -0700 Subject: [PATCH 19/28] lstm_resnet50_is_cat_ffn_c123_no_binary_loss --- .../lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml index 88a3bd7..2364e06 100644 --- a/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_resnet50_is_cat_ffn_c123_no_binary_loss.yml @@ -155,7 +155,7 @@ pipeline: type: LabelIndexer priority: 1.3 data_folder: ~/data/vqa-med - word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv # Export mappings and size to globals. export_word_mappings_to_globals: True streams: @@ -215,7 +215,7 @@ pipeline: type: LabelIndexer priority: 5.1 data_folder: ~/data/vqa-med - word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv # Export mappings and size to globals. export_word_mappings_to_globals: True streams: @@ -240,7 +240,7 @@ pipeline: pipe5_c123_ffn: priority: 5.3 type: FeedForwardNetwork - hidden: [50] + hidden: [100] dropout_rate: 0.5 streams: inputs: concatenated_activations @@ -286,7 +286,7 @@ pipeline: import_word_mappings_from_globals: True streams: inputs: pipe5_c123_predictions - outputs: pipe5_c123_predicted_answers + outputs: predicted_answers globals: word_mappings: word_mappings_c123_without_yn @@ -301,7 +301,7 @@ pipeline: tokenized_questions, category_names, pipe0_predicted_question_categories_names, pipe5_c123_masks, - answers, pipe5_c123_predicted_answers + answers, predicted_answers #: pipeline From 48c7287684254ef220c0fd9a0ed31d35d51805ac Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 01:06:28 -0700 Subject: [PATCH 20/28] lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml --- .../vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml index cba072b..2a46463 100644 --- a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml @@ -155,7 +155,7 @@ pipeline: priority: 1.3 type: LabelIndexer data_folder: ~/data/vqa-med - word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv # Export mappings and size to globals. export_word_mappings_to_globals: True streams: @@ -314,7 +314,7 @@ pipeline: import_word_mappings_from_globals: True streams: inputs: pipe6_c123_predictions - outputs: pipe6_c123_predicted_answers + outputs: predicted_answers globals: word_mappings: word_mappings_c123_without_yn @@ -328,7 +328,7 @@ pipeline: tokenized_questions, category_names, pipe0_predicted_question_categories_names, pipe6_c123_masks, - answers, pipe6_c123_predicted_answers + answers, predicted_answers #: pipeline From 9f935ab87c0d38e77e192a48ca73bfe6320c00f5 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 01:27:59 -0700 Subject: [PATCH 21/28] lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml --- ...0_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml | 144 ++++++++++++++++-- .../vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml | 4 +- 2 files changed, 131 insertions(+), 17 deletions(-) diff --git a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml index b0db77b..1b3f29d 100644 --- a/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml @@ -32,8 +32,8 @@ pipeline: priority: 0 type: GlobalVariablePublisher # Add input_size to globals. - keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c123_without_yn_word_to_ix] - values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}] + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, element_wise_activation_size, category_c123_without_yn_word_to_ix,category_binary_yn_word_to_ix] + values: [100, 2, 10, 100, 100, {"C1": 0, "C2": 1, "C3": 2}, {"BINARY": 3}] # Statistics. batch_size: @@ -162,8 +162,8 @@ pipeline: inputs: answers outputs: all_answers_ids globals: - vocabulary_size: vocabulary_size_c123_without_yn - word_mappings: word_mappings_c123_without_yn + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn ################# PIPE 2: SHARED IMAGE ENCODER ################# @@ -243,7 +243,7 @@ pipeline: priority: 6.1 type: LabelIndexer data_folder: ~/data/vqa-med - word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv # Export mappings and size to globals. export_word_mappings_to_globals: True streams: @@ -307,25 +307,139 @@ pipeline: f1score: pipe6_c123_f1score # C123 Predictions decoder. - pipe5_c123_prediction_decoder: - priority: 6.6 - type: WordDecoder - # Use the same word mappings as label indexer. - import_word_mappings_from_globals: True + #pipe5_c123_prediction_decoder: + # priority: 6.6 + # type: WordDecoder + # # Use the same word mappings as label indexer. + # import_word_mappings_from_globals: True + # streams: + # inputs: pipe6_c123_predictions + # outputs: pipe6_c123_predicted_answers + # globals: + # word_mappings: word_mappings_c123_without_yn + + + ################# PIPE 7: Y/N questions ################# + + # Answer encoding for PIPE 5. + pipe7_binary_yn_answer_indexer: + type: LabelIndexer + priority: 7.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True streams: - inputs: pipe6_c123_predictions - outputs: pipe6_c123_predicted_answers + inputs: answers + outputs: pipe7_binary_yn_answers_ids globals: - word_mappings: word_mappings_c123_without_yn - + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + # Sample masking based on categories. + pipe7_binary_yn_string_to_mask: + priority: 7.2 + type: StringToMask + globals: + word_mappings: category_binary_yn_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe7_binary_yn_masks + # Model 4: FFN C1 answering + pipe7_binary_yn_classifier: + priority: 7.3 + type: FeedForwardNetwork + hidden: [100] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe7_binary_yn_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + pipe7_binary_yn_nllloss: + type: NLLLoss + priority: 7.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe7_binary_yn_predictions + masks: pipe7_binary_yn_masks + targets: pipe7_binary_yn_answers_ids + loss: pipe7_binary_yn_loss + pipe7_binary_yn_precision_recall: + type: PrecisionRecallStatistics + priority: 7.5 + use_word_mappings: True + use_masking: True + show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe7_binary_yn_masks + predictions: pipe7_binary_yn_predictions + targets: pipe7_binary_yn_answers_ids + globals: + word_mappings: word_mappings_binary_yn + statistics: + precision: pipe7_binary_yn_precision + recall: pipe7_binary_yn_recall + f1score: pipe7_binary_yn_f1score + + # Y/N Predictions decoder. + #pipe7_binary_yn_prediction_decoder: + # type: WordDecoder + # priority: 7.6 + # # Use the same word mappings as label indexer. + # import_word_mappings_from_globals: True + # streams: + # inputs: pipe7_binary_yn_predictions + # outputs: pipe7_binary_yn_predicted_answers + # globals: + # word_mappings: word_mappings_binary_yn ################# PIPE 9: MERGE ANSWERS ################# + # Merge predictions + pipe8_merged_predictions: + type: JoinMaskedPredictions + priority: 8.1 + # Names of used input streams. + input_prediction_streams: [pipe6_c123_predictions, pipe7_binary_yn_predictions] + input_mask_streams: [pipe6_c123_masks, pipe7_binary_yn_masks] + input_word_mappings: [word_mappings_c123_without_yn, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_c123_binary_yn + streams: + output_strings: predicted_answers + output_indices: pipe8_merged_pred_indices + + # Statistics. + pipe8_merged_precision_recall: + type: PrecisionRecallStatistics + priority: 8.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_c123_binary_yn + streams: + targets: all_answers_ids + predictions: pipe8_merged_pred_indices + statistics: + precision: pipe8_merged_precision + recall: pipe8_merged_recall + f1score: pipe8_merged_f1score + + + + # Viewers. viewer: priority: 9.3 @@ -334,7 +448,7 @@ pipeline: tokenized_questions, category_names, pipe0_predicted_question_categories_names, pipe6_c123_masks, - answers, pipe6_c123_predicted_answers + answers, predicted_answers #: pipeline diff --git a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml index 03c2cea..7cbe09e 100644 --- a/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml +++ b/configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_only_yn_loss.yml @@ -206,7 +206,7 @@ pipeline: output_size: concatenated_activations_size - ################# PIPE 5: C1 + C2 + C3 questions ################# + ################# PIPE 5: Y/N questions ################# # Answer encoding for PIPE 5. pipe5_binary_yn_answer_indexer: @@ -276,7 +276,7 @@ pipeline: recall: pipe5_binary_yn_recall f1score: pipe5_binary_yn_f1score - # C123 Predictions decoder. + # Y/N Predictions decoder. pipe5_prediction_decoder: type: WordDecoder priority: 5.6 From 7db986fddb5901def63059cda67861474833dba5 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 01:39:20 -0700 Subject: [PATCH 22/28] increased loss treshold: 1e-3 --- configs/vqa_med_2019/default_vqa_med_2019.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/vqa_med_2019/default_vqa_med_2019.yml b/configs/vqa_med_2019/default_vqa_med_2019.yml index 11d7222..dfe01a6 100644 --- a/configs/vqa_med_2019/default_vqa_med_2019.yml +++ b/configs/vqa_med_2019/default_vqa_med_2019.yml @@ -22,7 +22,7 @@ training: # Terminal conditions: terminal_conditions: - loss_stop: 1.0e-2 + loss_stop: 1.0e-3 episode_limit: 10000 epoch_limit: -1 From 39242f6acf1cad0c576f8e94bba8391d0048fc81 Mon Sep 17 00:00:00 2001 From: Alexis Asseman <33075224+aasseman@users.noreply.github.com> Date: Tue, 30 Apr 2019 09:27:31 -0700 Subject: [PATCH 23/28] Add option to ignore words in BLEU --- .../default/components/publishers/bleu_statistics.yml | 3 +++ ptp/components/publishers/bleu_statistics.py | 11 +++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/configs/default/components/publishers/bleu_statistics.yml b/configs/default/components/publishers/bleu_statistics.yml index a79a245..c51f387 100644 --- a/configs/default/components/publishers/bleu_statistics.yml +++ b/configs/default/components/publishers/bleu_statistics.yml @@ -13,6 +13,9 @@ use_prediction_distributions: True # TODO! #use_masking: False +# Ignored words - useful for ignoring special tokens +ignored_words: ["", ""] + # Weights of n-grams used when calculating the score. weights: [0.25, 0.25, 0.25, 0.25] diff --git a/ptp/components/publishers/bleu_statistics.py b/ptp/components/publishers/bleu_statistics.py index b303ea9..6432c06 100644 --- a/ptp/components/publishers/bleu_statistics.py +++ b/ptp/components/publishers/bleu_statistics.py @@ -58,6 +58,9 @@ def __init__(self, name, config): # Get masking flag. #self.use_masking = self.config["use_masking"] + # Get ignored words + self.ignored_words = self.config["ignored_words"] + # Retrieve word mappings from globals. word_to_ix = self.globals["word_mappings"] # Construct reverse mapping for faster processing. @@ -144,12 +147,16 @@ def calculate_BLEU(self, data_dict): target_words = [] for t_ind in target_indices: if t_ind in self.ix_to_word.keys(): - target_words.append(self.ix_to_word[t_ind]) + w = self.ix_to_word[t_ind] + if w not in self.ignored_words: + target_words.append(w) # Change prediction indices to words. pred_words = [] for p_ind in pred_indices: if p_ind in self.ix_to_word.keys(): - pred_words.append(self.ix_to_word[p_ind]) + w = self.ix_to_word[p_ind] + if w not in self.ignored_words: + pred_words.append(w) # Calculate BLEU. scores.append(sentence_bleu([target_words], pred_words, self.weights)) #print("TARGET: {}\n".format(target_words)) From ecc69df2ae743aa59e332497c9fa02d40c47892e Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 10:29:59 -0700 Subject: [PATCH 24/28] extend answers - added second exported that creates the submission file --- ...ult_extend_answers.yml => extend_answers.yml} | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) rename configs/vqa_med_2019/{default_extend_answers.yml => extend_answers.yml} (90%) diff --git a/configs/vqa_med_2019/default_extend_answers.yml b/configs/vqa_med_2019/extend_answers.yml similarity index 90% rename from configs/vqa_med_2019/default_extend_answers.yml rename to configs/vqa_med_2019/extend_answers.yml index 97e9ddf..965263e 100644 --- a/configs/vqa_med_2019/default_extend_answers.yml +++ b/configs/vqa_med_2019/extend_answers.yml @@ -58,7 +58,7 @@ test_answers: # Add component for exporting answers to files. pipeline: disable: viewer,question_tokenizer -# # Viewers. + # Viewers. viewer_extended: priority: 100.4 type: StreamViewer @@ -68,13 +68,23 @@ pipeline: category_names,predicted_categories, answers,tokenized_answers,predicted_answers - exporter: + answer_exporter: priority: 100.5 type: StreamFileExporter - separator: '|' + separator: ',' + filename: 'answers.csv' input_streams: indices,image_ids,tokenized_questions, category_names,predicted_categories, answers,tokenized_answers,predicted_answers + submission_exporter: + priority: 100.6 + type: StreamFileExporter + separator: '|' + filename: 'submission.txt' + input_streams: + image_ids, + predicted_answers + #: pipeline From f0c037cdbb992942d18e5af5c0b6283f48de8929 Mon Sep 17 00:00:00 2001 From: Deepta Rajan Date: Tue, 30 Apr 2019 11:17:16 -0700 Subject: [PATCH 25/28] vqa attention --- .../components/models/vqa/attention.yml | 51 ++++++ .../c2_class_lstm_resnet50_attn_cat_is.yml | 120 +++++++++++++ ptp/components/models/__init__.py | 4 +- ptp/components/models/vqa/attention.py | 163 ++++++++++++++++++ 4 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 configs/default/components/models/vqa/attention.yml create mode 100644 configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml create mode 100644 ptp/components/models/vqa/attention.py diff --git a/configs/default/components/models/vqa/attention.yml b/configs/default/components/models/vqa/attention.yml new file mode 100644 index 0000000..97557bd --- /dev/null +++ b/configs/default/components/models/vqa/attention.yml @@ -0,0 +1,51 @@ +# This file defines the default values for the VQA_Attention model. + +#################################################################### +# 1. CONFIGURATION PARAMETERS that will be LOADED by the component. +#################################################################### + +# Dropout rate (LOADED) +# Default: 0 (means that it is turned off) +dropout_rate: 0 + +streams: + #################################################################### + # 2. Keymappings associated with INPUT and OUTPUT streams. + #################################################################### + + # Stream containing batch of encoded images (INPUT) + feature_maps: feature_maps + + # Stream containing batch of encoded questions (INPUT) + question_encodings: question_encodings + + # Stream containing outputs (OUTPUT) + outputs: outputs + +globals: + #################################################################### + # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. + #################################################################### + + # Height of the features tensor (RETRIEVED) + feature_maps_height: feature_maps_height + + # Width of the features tensor (RETRIEVED) + feature_maps_width: feature_maps_width + + # Depth of the features tensor (RETRIEVED) + feature_maps_depth: feature_maps_depth + + # Size of the question encodings input (RETRIEVED) + question_encoding_size: question_encoding_size + + # Size of the output (RETRIEVED) + output_size: output_size + + #################################################################### + # 4. Keymappings associated with GLOBAL variables that will be SET. + #################################################################### + + #################################################################### + # 5. Keymappings associated with statistics that will be ADDED. + #################################################################### diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml new file mode 100644 index 0000000..9b2f1fe --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml @@ -0,0 +1,120 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml + +training: + problem: + batch_size: 48 + # Appy all preprocessing/data augmentations. + image_preprocessing: normalize + # none | random_affine | random_horizontal_flip | normalize | all + question_preprocessing: all + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + streams: + # Problem is returning tokenized questions. + questions: tokenized_questions + +validation: + problem: + batch_size: 48 + question_preprocessing: lowercase,remove_punctuation,tokenize + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + streams: + # Problem is returning tokenized questions. + questions: tokenized_questions + + +pipeline: + name: c2_class_lstm_resnet50_attn_cat_is + + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size, latent_size, num_attention_heads, attention_activation_size, question_image_activation_size] + values: [100, 100, 2, 4196, 300] + + ################# PIPE 0: question ################# + + # Model 1: Embeddings + question_embeddings: + priority: 1.2 + type: SentenceEmbeddings + embeddings_size: 100 + pretrained_embeddings_file: glove.6B.100d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + priority: 1.3 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + use_logsoftmax: False + initial_state: Trainable + dropout_rate: 0.1 + hidden_size: 50 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + ################# PIPE 2: image ################# + # Image encoder. + image_encoder: + priority: 3.1 + type: TorchVisionWrapper + model_type: resnet50 + return_feature_maps: True + streams: + inputs: images + outputs: feature_maps #image_activations + # globals: + # output_size: image_encoder_output_size + + ################# PIPE 3: image-question fusion ################# + # Attention + FF. + question_image_fusion: + priority: 4.1 + type: VQA_Attention + dropout_rate: 0.5 + streams: + image_encodings: feature_maps #image_activations + question_encodings: question_activations + outputs: attention_activations + globals: + question_encoding_size: question_encoder_output_size + latent_size: latent_size + multi_head_attention: num_attention_heads + output_size: attention_activation_size + + # question_image_ffn: + # priority: 4.2 + # type: FeedForwardNetwork + # hidden_sizes: [100] + # dropout_rate: 0.5 + # streams: + # inputs: attention_activations + # predictions: question_image_activations + # globals: + # input_size: attention_activation_size + # prediction_size: question_image_activation_size + + classifier: + priority: 5.1 + type: FeedForwardNetwork + hidden_sizes: [100] + dropout_rate: 0.5 + streams: + inputs: attention_activations #question_image_activations #concatenated_activations + globals: + input_size: attention_activation_size #question_image_activation_size #concatentated_activations_size + prediction_size: vocabulary_size_c2 + + + #: pipeline diff --git a/ptp/components/models/__init__.py b/ptp/components/models/__init__.py index 20c2841..81a3868 100644 --- a/ptp/components/models/__init__.py +++ b/ptp/components/models/__init__.py @@ -12,6 +12,7 @@ from .vqa.element_wise_multiplication import ElementWiseMultiplication from .vqa.multimodal_compact_bilinear_pooling import MultimodalCompactBilinearPooling from .vqa.relational_network import RelationalNetwork +from .vqa.attention import VQA_Attention __all__ = [ 'ConvNetEncoder', @@ -26,5 +27,6 @@ 'ElementWiseMultiplication', 'MultimodalCompactBilinearPooling', 'RelationalNetwork', - 'Attn_Decoder_RNN' + 'Attn_Decoder_RNN', + 'VQA_Attention' ] diff --git a/ptp/components/models/vqa/attention.py b/ptp/components/models/vqa/attention.py new file mode 100644 index 0000000..f3c778d --- /dev/null +++ b/ptp/components/models/vqa/attention.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (C) IBM Corporation 2018 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = "Deepta Rajan" + + +import torch + +from ptp.components.models.model import Model +from ptp.data_types.data_definition import DataDefinition + + +class VQA_Attention(Model): + """ + Element of one of the classical baselines for Visual Question Answering. + Attention-weighted image maps are computed based on the question. + The multi-modal data (question and attention-weighted image maps) are fused via concatenation and returned (for subsequent classification, done in a separate component e.g. ffn). + + On the basis of: Vahid Kazemi Ali Elqursh. "Show, Ask, Attend, and Answer: A Strong Baseline For Visual Question Answering" (2017). + Code: https://github.com/Cyanogenoid/pytorch-vqa/blob/master/model.py + """ + def __init__(self, name, config): + """ + Initializes the model, creates the required layers. + + :param name: Name of the model (taken from the configuration file). + + :param config: Parameters read from configuration file. + :type config: ``ptp.configuration.ConfigInterface`` + + """ + super(VQA_Attention, self).__init__(name, VQA_Attention, config) + + # Get key mappings. + self.key_feature_maps = self.stream_keys["feature_maps"] + self.key_question_encodings = self.stream_keys["question_encodings"] + self.key_outputs = self.stream_keys["outputs"] + + # Retrieve input/output sizes from globals. + self.feature_maps_height = self.globals["feature_maps_height"] + self.feature_maps_width = self.globals["feature_maps_width"] + self.feature_maps_depth = self.globals["feature_maps_depth"] + self.question_encoding_size = self.globals["question_encoding_size"] + self.latent_size = self.globals["latent_size"] #TO-DO add to yml file + self.num_attention_heads = self.globals["multi_head_attention"] + # Output feature size + self.output_size = self.feature_maps_depth*self.num_attention_heads + self.question_encoding_size + + # Map image and question encodings to a common latent space of dimension 'latent_size'. + self.image_encodings_conv = torch.nn.Conv2d(self.feature_maps_depth, self.latent_size, 1, bias=False) + self.question_encodings_ff = torch.nn.Linear(self.question_encoding_size, self.latent_size) + + # Scalar-dot product attention function is implemented as a Conv operation + self.attention_conv = torch.nn.Conv2d(self.latent_size, self.num_attention_heads, 1) + + # Create activation layer. + self.activation = torch.nn.ReLU() + + # Retrieve dropout rate value - if set, will put dropout between every layer. + dropout_rate = self.config["dropout_rate"] + + # Create dropout layer. + self.dropout = torch.nn.Dropout(dropout_rate) + + + def input_data_definitions(self): + """ + Function returns a dictionary with definitions of input data that are required by the component. + + :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + return { + self.key_feature_maps: DataDefinition([-1, self.feature_maps_depth, self.feature_maps_height, self.feature_maps_width], [torch.Tensor], "Batch of feature maps [BATCH_SIZE x FEAT_DEPTH x FEAT_HEIGHT x FEAT_WIDTH]"), + self.key_question_encodings: DataDefinition([-1, self.question_encoding_size], [torch.Tensor], "Batch of encoded questions [BATCH_SIZE x QUESTION_ENCODING_SIZE]"), + } + + + def output_data_definitions(self): + """ + Function returns a dictionary with definitions of output data produced the component. + + :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + return { + self.key_outputs: DataDefinition([-1, self.output_size], [torch.Tensor], "Batch of outputs [BATCH_SIZE x OUTPUT_SIZE]") + } + + def forward(self, data_dict): + """ + Main forward pass of the model. + + :param data_dict: DataDict({'images',**}) + :type data_dict: ``ptp.dadatypes.DataDict`` + """ + + # Unpack DataDict. + enc_img = data_dict[self.key_feature_maps] #[48, 2048, 7, 7] + enc_q = data_dict[self.key_question_encodings] #[48, 100] + # print("im_enc", enc_img.shape) + # print("enc_q", enc_q.shape) + + # L2 norm of image encoding + enc_img = enc_img / (enc_img.norm(p=2, dim=1, keepdim=True).expand_as(enc_img) + 1e-8) + + # Compute attention maps for image using questions + latent_img = self.image_encodings_conv(self.dropout(enc_img)) # [48, 100, 7, 7] + # print("latent_im", latent_img.shape) + latent_q = self.question_encodings_ff(self.dropout(enc_q)) # [48, 100] + # print("latent_q", latent_q.shape) + latent_q_tile = tile_2d_over_nd(latent_q, latent_img) # [48, 100, 7, 7] + # print("latent_q_tile", latent_q_tile.shape) + attention = self.activation(latent_img + latent_q_tile) # + # print("attention", attention.shape) + attention = self.attention_conv(self.dropout(attention)) # [48, 2, 7, 7] + # print("attention", attention.shape) + + # Apply attention to image encoding + attention_enc_img = apply_attention(enc_img, attention) # [48, 2048, 7, 7], [48, 2, 7, 7] + # print("attention im", attention_enc_img.shape) + + # Fusion -- Concatenate attention-weighted image encodings and question encodings. + outputs = torch.cat([attention_enc_img, latent_q], dim=1) + # print("outputs", outputs.shape) + # Add predictions to datadict. + data_dict.extend({self.key_outputs: outputs}) + + +def tile_2d_over_nd(feature_vector, feature_map): + """ Repeat the same feature vector over all spatial positions of a given feature map. + The feature vector should have the same batch size and number of features as the feature map. + """ + n, c = feature_vector.size() + spatial_size = feature_map.dim() - 2 + tiled = feature_vector.view(n, c, *([1] * spatial_size)).expand_as(feature_map) + return tiled + + +def apply_attention(input, attention): + """ Apply any number of attention maps over the input. """ + n, c = input.size()[:2] + glimpses = attention.size(1) # glimpses is equivalent to multiple heads in attention + + # flatten the spatial dims into the third dim, since we don't need to care about how they are arranged + input = input.view(n, 1, c, -1) # [n, 1, c, s] [batch, 1, channels, height*width] [48, 1, 2048, 7*7] + attention = attention.view(n, glimpses, -1) # [48, 2, 7*7] + attention = torch.nn.functional.softmax(attention, dim=-1).unsqueeze(2) # [n, g, 1, s] [batch, multi_head, 1, height*width] [48, 2, 1, 7*7] + weighted = attention * input # [n, g, c, s] [48, 2, 2048, 7*7] + weighted_mean = weighted.sum(dim=-1) # [n, g, c] [48, 2, 2048] + return weighted_mean.view(n, -1) # [48, 4196] From e486dc7266cd544dee26b696b576bf8494c519c7 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 11:26:02 -0700 Subject: [PATCH 26/28] separator export added to stream_file_exporter --- .../default/components/publishers/stream_file_exporter.yml | 3 +++ configs/vqa_med_2019/extend_answers.yml | 3 ++- ptp/components/publishers/stream_file_exporter.py | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/configs/default/components/publishers/stream_file_exporter.yml b/configs/default/components/publishers/stream_file_exporter.yml index 1a5546f..3d83b20 100644 --- a/configs/default/components/publishers/stream_file_exporter.yml +++ b/configs/default/components/publishers/stream_file_exporter.yml @@ -11,6 +11,9 @@ input_streams: '' # Separator that will be placed between values (LOADED) separator: ',' +# Adds additional line to output file enabling Excel to use different separator while loading (LOADED) +export_separator_line_to_csv: False + # Name of the file containing output values (LOADED) filename: 'outputs.txt' diff --git a/configs/vqa_med_2019/extend_answers.yml b/configs/vqa_med_2019/extend_answers.yml index 965263e..9e2f9a4 100644 --- a/configs/vqa_med_2019/extend_answers.yml +++ b/configs/vqa_med_2019/extend_answers.yml @@ -71,8 +71,9 @@ pipeline: answer_exporter: priority: 100.5 type: StreamFileExporter - separator: ',' + separator: '|' filename: 'answers.csv' + export_separator_line_to_csv: True input_streams: indices,image_ids,tokenized_questions, category_names,predicted_categories, diff --git a/ptp/components/publishers/stream_file_exporter.py b/ptp/components/publishers/stream_file_exporter.py index ec34ea5..3189e4c 100644 --- a/ptp/components/publishers/stream_file_exporter.py +++ b/ptp/components/publishers/stream_file_exporter.py @@ -55,6 +55,11 @@ def __init__(self, name, config): filename = self.config["filename"] abs_filename = path.join(self.app_state.log_dir, filename) self.file = open(abs_filename, 'w') + + # Export additional line. + if self.config["export_separator_line_to_csv"]: + self.file.write("sep={}\n",self.separator) + self.logger.info("Writing values from {} streams to {}".format(self.input_stream_keys, abs_filename)) From 61be38d516fa06982135d2285da5a3fcee2a2555 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 11:27:01 -0700 Subject: [PATCH 27/28] separator export fix --- ptp/components/publishers/stream_file_exporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ptp/components/publishers/stream_file_exporter.py b/ptp/components/publishers/stream_file_exporter.py index 3189e4c..64cf46b 100644 --- a/ptp/components/publishers/stream_file_exporter.py +++ b/ptp/components/publishers/stream_file_exporter.py @@ -58,7 +58,7 @@ def __init__(self, name, config): # Export additional line. if self.config["export_separator_line_to_csv"]: - self.file.write("sep={}\n",self.separator) + self.file.write("sep={}\n".format(self.separator)) self.logger.info("Writing values from {} streams to {}".format(self.input_stream_keys, abs_filename)) From 3bae683ca035a41c944e04682516fffc96a80bb6 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 30 Apr 2019 15:29:54 -0700 Subject: [PATCH 28/28] attention cleanups --- .../components/models/vqa/attention.yml | 7 ++++ .../c2_class_lstm_resnet50_attn_cat_is.yml | 38 +++++-------------- .../default_c2_classification.yml | 2 +- ptp/components/models/vqa/attention.py | 7 +++- 4 files changed, 23 insertions(+), 31 deletions(-) diff --git a/configs/default/components/models/vqa/attention.yml b/configs/default/components/models/vqa/attention.yml index 97557bd..830f4b8 100644 --- a/configs/default/components/models/vqa/attention.yml +++ b/configs/default/components/models/vqa/attention.yml @@ -8,6 +8,13 @@ # Default: 0 (means that it is turned off) dropout_rate: 0 +# Size of the latent space (LOADED) +latent_size: 100 + +# Number of attention heads (LOADED) +num_attention_heads: 2 + + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. diff --git a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml index 9b2f1fe..08b043e 100644 --- a/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml +++ b/configs/vqa_med_2019/c2_classification/c2_class_lstm_resnet50_attn_cat_is.yml @@ -5,10 +5,7 @@ training: problem: batch_size: 48 # Appy all preprocessing/data augmentations. - image_preprocessing: normalize - # none | random_affine | random_horizontal_flip | normalize | all - question_preprocessing: all - # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + question_preprocessing: lowercase,remove_punctuation,tokenize streams: # Problem is returning tokenized questions. questions: tokenized_questions @@ -16,22 +13,21 @@ training: validation: problem: batch_size: 48 + # Appy all preprocessing/data augmentations. question_preprocessing: lowercase,remove_punctuation,tokenize - # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all streams: # Problem is returning tokenized questions. questions: tokenized_questions pipeline: - name: c2_class_lstm_resnet50_attn_cat_is global_publisher: priority: 0 type: GlobalVariablePublisher # Add input_size to globals. - keys: [question_encoder_output_size, latent_size, num_attention_heads, attention_activation_size, question_image_activation_size] - values: [100, 100, 2, 4196, 300] + keys: [question_encoder_output_size, attention_activation_size, question_image_activation_size] + values: [100, 4196, 300] ################# PIPE 0: question ################# @@ -73,9 +69,7 @@ pipeline: return_feature_maps: True streams: inputs: images - outputs: feature_maps #image_activations - # globals: - # output_size: image_encoder_output_size + outputs: feature_maps ################# PIPE 3: image-question fusion ################# # Attention + FF. @@ -83,37 +77,25 @@ pipeline: priority: 4.1 type: VQA_Attention dropout_rate: 0.5 + latent_size: 100 + num_attention_heads: 2 streams: - image_encodings: feature_maps #image_activations + image_encodings: feature_maps question_encodings: question_activations outputs: attention_activations globals: question_encoding_size: question_encoder_output_size - latent_size: latent_size - multi_head_attention: num_attention_heads output_size: attention_activation_size - # question_image_ffn: - # priority: 4.2 - # type: FeedForwardNetwork - # hidden_sizes: [100] - # dropout_rate: 0.5 - # streams: - # inputs: attention_activations - # predictions: question_image_activations - # globals: - # input_size: attention_activation_size - # prediction_size: question_image_activation_size - classifier: priority: 5.1 type: FeedForwardNetwork hidden_sizes: [100] dropout_rate: 0.5 streams: - inputs: attention_activations #question_image_activations #concatenated_activations + inputs: attention_activations globals: - input_size: attention_activation_size #question_image_activation_size #concatentated_activations_size + input_size: attention_activation_size prediction_size: vocabulary_size_c2 diff --git a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml index b4b08d0..9511a28 100644 --- a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml +++ b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml @@ -82,6 +82,6 @@ pipeline: viewer: type: StreamViewer priority: 100.4 - input_streams: questions,tokenized_questions,category_names,answers,predicted_answers + input_streams: tokenized_questions,category_names,answers,predicted_answers #: pipeline diff --git a/ptp/components/models/vqa/attention.py b/ptp/components/models/vqa/attention.py index f3c778d..15c7914 100644 --- a/ptp/components/models/vqa/attention.py +++ b/ptp/components/models/vqa/attention.py @@ -55,8 +55,11 @@ def __init__(self, name, config): self.feature_maps_width = self.globals["feature_maps_width"] self.feature_maps_depth = self.globals["feature_maps_depth"] self.question_encoding_size = self.globals["question_encoding_size"] - self.latent_size = self.globals["latent_size"] #TO-DO add to yml file - self.num_attention_heads = self.globals["multi_head_attention"] + + # Get size of latent space and number of heads from config. + self.latent_size = self.config["latent_size"] + self.num_attention_heads = self.config["num_attention_heads"] + # Output feature size self.output_size = self.feature_maps_depth*self.num_attention_heads + self.question_encoding_size