diff --git a/configs/default/components/models/attn_decoder_rnn..yml b/configs/default/components/models/attn_decoder_rnn..yml new file mode 100644 index 0000000..f676809 --- /dev/null +++ b/configs/default/components/models/attn_decoder_rnn..yml @@ -0,0 +1,78 @@ +# This file defines the default values for the GRU decoder with attention. + +#################################################################### +# 1. CONFIGURATION PARAMETERS that will be LOADED by the component. +#################################################################### + +# Size of the hidden state (LOADED) +hidden_size: 100 + +# Wether to include the last hidden state in the outputs +output_last_state: False + +# Type of recurrent cell (LOADED) +# -> Only GRU is supported + +# Number of "stacked" layers (LOADED) +# -> Only a single layer is supported + +# Dropout rate (LOADED) +# Default: 0 (means that it is turned off) +dropout_rate: 0 + +# Prediction mode (LOADED) +# Options: +# * Dense (passes every activation through output layer) | +# * Last (passes only the last activation though output layer) | +# * None (all outputs are discarded) +prediction_mode: Dense + +# Enable FFN layer at the output of the RNN (before eventual feed back in the case of autoregression). +# Useful if the raw outputs of the RNN are needed, for attention encoder-decoder for example. +ffn_output: True + +# Length of generated output sequence (LOADED) +# User must set it per task, as it is task specific. +autoregression_length: 10 + +# If true, output of the last layer will be additionally processed with Log Softmax (LOADED) +use_logsoftmax: True + +streams: + #################################################################### + # 2. Keymappings associated with INPUT and OUTPUT streams. + #################################################################### + + # Stream containing batch of encoder outputs (INPUT) + inputs: inputs + + # Stream containing the inital state of the RNN (INPUT) + # The stream will be actually created only if `inital_state: Input` + input_state: input_state + + # Stream containing predictions (OUTPUT) + predictions: predictions + + # Stream containing the final output state of the RNN (output) + # The stream will be actually created only if `output_last_state: True` + output_state: output_state + +globals: + #################################################################### + # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. + #################################################################### + + # Size of the input (RETRIEVED) + input_size: input_size + + # Size of the prediction (RETRIEVED) + prediction_size: prediction_size + + #################################################################### + # 4. Keymappings associated with GLOBAL variables that will be SET. + #################################################################### + + #################################################################### + # 5. Keymappings associated with statistics that will be ADDED. + #################################################################### + diff --git a/configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml b/configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml new file mode 100644 index 0000000..732366a --- /dev/null +++ b/configs/vqa_med_2019/c4_classification/c4_frozen_if_gru_dec.yml @@ -0,0 +1,151 @@ +# Load config defining problems for training, validation and testing. +default_configs: + vqa_med_2019/default_vqa_med_2019.yml, + vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml, + vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml + +c4_hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + batch_size: &batch_size 256 + preload_images: &preload_images False + num_workers: &num_workers 4 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: C4 + export_sample_weights: ~/data/vqa-med/answers.c4.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c4.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: C4 + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding. + pipe6_c123_binary_yn_answer_indexer: + priority: 6.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + # Model 4: FFN C123 answering + pipe6_c123_binary_yn_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe6_c123_binary_yn_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + loss: pipe6_c123_loss + + pipe6_c123_binary_yn_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c123_binary_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_binary_yn_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c123_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, predicted_category_names, + answers, predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml b/configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml deleted file mode 100644 index ee05864..0000000 --- a/configs/vqa_med_2019/c4_classification/c4_lstm_vgg16_ewm_cat_is_attdec.yml +++ /dev/null @@ -1,236 +0,0 @@ -# Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml - -# Training parameters: -training: - problem: - batch_size: 32 # 200 requires to use 4 GPUs! - categories: C4 - question_preprocessing: lowercase, remove_punctuation, tokenize #, random_remove_stop_words #,random_shuffle_words - answer_preprocessing: lowercase, remove_punctuation, tokenize - export_sample_weights: ~/data/vqa-med/answers.c4.weights.csv - sampler: - weights: ~/data/vqa-med/answers.c4.weights.csv - dataloader: - num_workers: 4 - # Termination. - terminal_conditions: - loss_stop: 1.0e-2 - episode_limit: 1000000 - epoch_limit: -1 - -# Validation parameters: -validation: - problem: - batch_size: 32 - categories: C4 - question_preprocessing: lowercase, remove_punctuation, tokenize - answer_preprocessing: lowercase, remove_punctuation, tokenize - dataloader: - num_workers: 4 - -pipeline: - - ################# PIPE 1: SHARED ################# - - global_publisher: - priority: 1.0 - type: GlobalVariablePublisher - # Add input_size to globals. - keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size] - values: [100, 500, 100, 2, 10] - - # Answer encoding. - answer_indexer: - priority: 1.1 - type: SentenceIndexer - data_folder: ~/data/vqa-med - word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv - # Export answer word mappings to globals. - export_word_mappings_to_globals: True - export_pad_mapping_to_globals: True - additional_tokens: , - # Add token at the end of sentence. - eos_token: True - fixed_padding: 10 # The longest question! max is 19! - streams: - inputs: answers - outputs: indexed_answers - globals: - vocabulary_size: ans_vocabulary_size - word_mappings: ans_word_mappings - pad_index: ans_pad_index - - - ################# PIPE 2: SHARED QUESTION ENCODER ################# - - # Question embeddings - question_embeddings: - priority: 2.0 - type: SentenceEmbeddings - embeddings_size: 50 - pretrained_embeddings_file: glove.6B.50d.txt - data_folder: ~/data/vqa-med - word_mappings_file: questions.all.word.mappings.csv - fixed_padding: 10 # The longest question! max is 19! - additional_tokens: , - streams: - inputs: questions - outputs: embedded_questions - - # Single layer GRU Encoder - question_encoder: - priority: 2.1 - type: RecurrentNeuralNetwork - # Do not wrap that model with DataDictParallel! - #parallelize: False - cell_type: GRU - initial_state: Trainable - hidden_size: 50 - num_layers: 1 - # We will project outputs that should reassemble outputs of answer word embeddings. - use_logsoftmax: False - output_last_state: True - prediction_mode: Dense - #ffn_output: False - #dropout_rate: 0.1 - streams: - inputs: embedded_questions - predictions: s2s_encoder_output - output_state: s2s_state_output - globals: - input_size: embeddings_size - prediction_size: question_encoder_output_size - - ################# PIPE 2: SHARED IMAGE ENCODER ################# - - # Image encoder. - image_encoder: - priority: 2.0 - type: TorchVisionWrapper - model_type: vgg16 - streams: - inputs: images - outputs: image_activations - globals: - output_size: image_encoder_output_size - - - - question_hidden_state_reshaper: - priority: 3.01 - type: ReshapeTensor - input_dims: [-1, 1, 100] - output_dims: [-1, 100] - streams: - inputs: s2s_state_output - outputs: s2s_state_output_reshaped - globals: - output_size: s2s_state_output_reshaped_size - - # Element wise multiplication + FF. - question_image_fusion: - priority: 3.1 - type: ElementWiseMultiplication - dropout_rate: 0.5 - streams: - image_encodings: image_activations - question_encodings: s2s_state_output_reshaped - outputs: element_wise_activations - globals: - image_encoding_size: image_encoder_output_size - question_encoding_size: question_encoder_output_size - output_size: element_wise_activation_size - - question_image_to_answer_space_projection_ffn: - # Role of this component is to "project" output of fusion component to "answer space". - priority: 3.2 - type: FeedForwardNetwork - hidden_sizes: [100] - dropout_rate: 0.5 - # Output should not go throught softmax! - use_logsoftmax: False - streams: - inputs: element_wise_activations - predictions: question_image_activations - globals: - input_size: element_wise_activation_size - prediction_size: ans_vocabulary_size - - projected_question_image_reshaper: - priority: 3.3 - type: ReshapeTensor - input_dims: [-1, 2088] - output_dims: [-1, 1, 2088] - streams: - inputs: question_image_activations - outputs: question_image_activations_reshaped - globals: - output_size: question_image_activations_reshaped_size - - # Single layer GRU Decoder with attention - decoder: - type: Attn_Decoder_RNN - priority: 4 - hidden_size: 100 - # Output layer is softmax layer, projecting "1-hot like word encodings". - use_logsoftmax: True - autoregression_length: 10 # Current implementation requires this value to be equal to fixed_padding in SentenceEmbeddings/Indexer... - prediction_mode: Dense - dropout_rate: 0.1 - streams: - inputs: s2s_encoder_output - predictions: predictions - input_state: question_image_activations_reshaped - globals: - input_size: ans_vocabulary_size - prediction_size: ans_vocabulary_size - - s# Loss - nllloss: - type: NLLLoss - priority: 6 - num_targets_dims: 2 - streams: - targets: indexed_answers - loss: loss - globals: - ignore_index: ans_pad_index - - # Prediction decoding. - prediction_decoder: - priority: 10 - type: SentenceIndexer - # Reverse mode. - reverse: True - # Use distributions as inputs. - use_input_distributions: True - data_folder: ~/data/vqa-med - import_word_mappings_from_globals: True - globals: - word_mappings: ans_word_mappings - streams: - inputs: predictions - outputs: prediction_sentences - - # Statistics. - batch_size: - type: BatchSizeStatistics - priority: 100.0 - - bleu: - type: BLEUStatistics - priority: 100.2 - globals: - word_mappings: ans_word_mappings - streams: - targets: indexed_answers - - - # Viewers. - viewer: - type: StreamViewer - priority: 100.3 - input_streams: questions,answers,indexed_answers,prediction_sentences - -#: pipeline diff --git a/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index b93eb56..dc34516 100644 --- a/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml new file mode 100644 index 0000000..9407c13 --- /dev/null +++ b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c1234_loss.yml @@ -0,0 +1,154 @@ +# Load config defining problems for training, validation and testing. +default_configs: + vqa_med_2019/default_vqa_med_2019.yml, + vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml + #vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml + +c123_hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500,500] + + batch_size: &batch_size 256 + preload_images: &preload_images False + num_workers: &num_workers 4 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: all + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_c4_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_c4_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: all + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding. + pipe6_c1234_answer_indexer: + priority: 6.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.all.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c1234_binary_yn + word_mappings: word_mappings_c1234_binary_yn + + + # Model 4: FFN C123 answering + pipe6_c1234_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c1234_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1234_binary_yn + + pipe6_c1234_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c1234_predictions + targets: answers_ids + loss: pipe6_c1234_loss + + pipe6_c1234_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c1234_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c1234_binary_yn + statistics: + precision: pipe6_c1234_precision + recall: pipe6_c1234_recall + f1score: pipe6_c1234_f1score + + # C123 Predictions decoder. + pipe6_c1234_binary_yn_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c1234_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c1234_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, predicted_category_names, + answers, predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/evaluation/frozen_if_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c123_loss.yml new file mode 100644 index 0000000..e82be5a --- /dev/null +++ b/configs/vqa_med_2019/evaluation/frozen_if_ffn_c123_loss.yml @@ -0,0 +1,154 @@ +# Load config defining problems for training, validation and testing. +default_configs: + vqa_med_2019/default_vqa_med_2019.yml, + vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml, + vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml + +c123_hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] + + batch_size: &batch_size 256 + preload_images: &preload_images False + num_workers: &num_workers 4 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding. + pipe6_c123_binary_yn_answer_indexer: + priority: 6.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + # Model 4: FFN C123 answering + pipe6_c123_binary_yn_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe6_c123_binary_yn_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + loss: pipe6_c123_loss + + pipe6_c123_binary_yn_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c123_binary_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_binary_yn_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c123_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, predicted_category_names, + answers, predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml index 232cbbe..2091e16 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml index bca7a7f..cd28ae0 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml index 3f9aa05..896b221 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. @@ -40,9 +41,9 @@ hyperparameters: # Final classifier: FFN. answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] - batch_size: &batch_size 100 - preload_images: &preload_images True - num_workers: &num_workers 1 + batch_size: &batch_size 300 + preload_images: &preload_images False + num_workers: &num_workers 4 # Training parameters: training: diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index d718eeb..8f9d748 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml index 1b4363b..3ec7a77 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml @@ -1,5 +1,6 @@ # Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +default_configs: + vqa_med_2019/default_vqa_med_2019.yml #,vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml hyperparameters: # In here I am putting some of the hyperparameters from spreadsheet. diff --git a/configs/vqa_med_2019/c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml similarity index 69% rename from configs/vqa_med_2019/c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml rename to configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml index 4d554d1..c3ae040 100644 --- a/configs/vqa_med_2019/c4_classification/frozen_pipeline_input_fusion_glove_lstm_vgg_att_is_cat.yml +++ b/configs/vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml @@ -1,12 +1,16 @@ # Part of pipeline containing components constituting the "Inputs Fusion" pipeline. # Inputs: -# * -# * +# streams: +# * tokenized_questions +# * images +# * image_sizes # Outputs: -# * -# * +# streams: +# * concatenated_activations +# globals: +# * concatenated_activations_size # "Inputs Fusion" # 0.: @@ -24,44 +28,50 @@ checkpoint: &checkpoint ~/image-clef-2019/experiments/c4_encoders/20190504_20244 # This one will be skipped, as this is C123 classifier! # + Model 'pipe6_c123_answer_classifier' [FeedForwardNetwork] params saved +pipe_if0_hyperparameters: + # WARNING: as we are loading the pretrained pipeline, all those values must stay! + + # Image encoder. + image_encoder_model: &image_encoder_model vgg16 + + # Question encoder. + question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt + # Options: '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt | glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled + question_encoder_embeddings_size_val: &question_encoder_embeddings_size_val 50 + question_encoder_lstm_size_val: &question_encoder_lstm_size_val 50 + question_encoder_output_size_val: &question_encoder_output_size_val 100 + + # Fusion I: image + question + question_image_fusion_type_val: &question_image_fusion_type VQA_Attention + + # Image size encoder. + image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10 + + # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val) + question_image_size_fusion_size_val: &question_image_size_fusion_size_val 1134 + pipeline: ################# PIPE 0: SHARED ################# # Add global variables. - global_publisher: + pipe_if0_global_publisher: priority: 0.11 type: GlobalVariablePublisher # Add input_size to globals. - keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size] #, image_encoder_output_size] #, fused_activation_size] - values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val] #, *image_encoder_output_size_val] #, *question_image_fusion_size_val] + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size] + values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val] # Statistics. - batch_size: + pipe_if0_batch_size: priority: 0.12 type: BatchSizeStatistics - # Answer encoding. - #pipe1_all_answer_indexer: - # priority: 0.13 - # type: LabelIndexer - # data_folder: ~/data/vqa-med - # word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv - # # Export mappings and size to globals. - # export_word_mappings_to_globals: True - # streams: - # inputs: answers - # outputs: answers_ids - # globals: - # vocabulary_size: vocabulary_size_c123_binary_yn - # word_mappings: word_mappings_c123_binary_yn - - ################# PIPE 1: SHARED QUESTION ENCODER ################# # Model 1: question embeddings - pipe1_question_embeddings: + pipe_if1_question_embeddings: priority: 1.1 type: SentenceEmbeddings embeddings_size: *question_encoder_embeddings_size_val @@ -78,10 +88,10 @@ pipeline: inputs: tokenized_questions outputs: embedded_questions globals: - embeddings_size: pipe1_embeddings_size + embeddings_size: pipe_if1_embeddings_size # Model 2: question RNN - pipe1_lstm: + pipe_if1_lstm: priority: 1.2 type: RecurrentNeuralNetwork cell_type: LSTM @@ -92,20 +102,20 @@ pipeline: # LOAD AND FREEZE # load: file: *checkpoint - model: pipe1_question_embeddings + model: pipe1_lstm freeze: True ################### streams: inputs: embedded_questions predictions: question_activations globals: - input_size: pipe1_embeddings_size + input_size: pipe_if1_embeddings_size prediction_size: question_encoder_output_size ################# PIPE 2: SHARED IMAGE ENCODER ################# # Image encoder. - image_encoder: + pipe_if2_image_encoder: priority: 2.1 type: TorchVisionWrapper model: *image_encoder_model @@ -113,7 +123,7 @@ pipeline: # LOAD AND FREEZE # load: file: *checkpoint - model: pipe1_question_embeddings + model: image_encoder freeze: True ################### streams: @@ -123,14 +133,14 @@ pipeline: ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# # Model - image size classifier. - image_size_encoder: + pipe_if3_image_size_encoder: priority: 3.1 type: FeedForwardNetwork use_logsoftmax: False # LOAD AND FREEZE # load: file: *checkpoint - model: pipe1_question_embeddings + model: image_size_encoder freeze: True ################### streams: @@ -142,13 +152,19 @@ pipeline: ################# PIPE 4: image-question fusion ################# # Attention + FF. - question_image_fusion: + pipe_if4_question_image_fusion: priority: 4.1 type: *question_image_fusion_type dropout_rate: 0.5 # Attention params. latent_size: 100 num_attention_heads: 2 + # LOAD AND FREEZE # + load: + file: *checkpoint + model: question_image_fusion + freeze: True + ################### streams: image_encodings: feature_maps question_encodings: question_activations @@ -158,12 +174,17 @@ pipeline: output_size: fused_activation_size - question_image_ffn: + pipe_if4_question_image_ffn: priority: 4.2 type: FeedForwardNetwork - #hidden_sizes: [*question_image_fusion_size_val] dropout_rate: 0.5 use_logsoftmax: False + # LOAD AND FREEZE # + load: + file: *checkpoint + model: question_image_ffn + freeze: True + ################### streams: inputs: fused_activations predictions: question_image_activations @@ -174,7 +195,7 @@ pipeline: ################# PIPE 5: image-question-image size fusion ################# # 5th subpipeline: concatenation - concat: + pipe_if5_concat: priority: 5.1 type: Concatenation input_streams: [question_image_activations,image_size_activations] diff --git a/configs/vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml similarity index 88% rename from configs/vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml rename to configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml index 6ca6986..c144f57 100644 --- a/configs/vqa_med_2019/question_categorization/frozen_pipeline_qc_glove_rnn_ffn.yml +++ b/configs/vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml @@ -19,6 +19,7 @@ # 0.56: pipe_qc_category_accuracy # Loaded checkpoint: 20190505_130406 +checkpoint: &checkpoint ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt pipeline: ################# PIPE: QUESTION CATEGORIZATION ################# @@ -37,7 +38,7 @@ pipeline: type: SentenceEmbeddings # LOAD AND FREEZE # load: - file: ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + file: *checkpoint model: question_embeddings freeze: True ################### @@ -58,7 +59,7 @@ pipeline: cell_type: LSTM # LOAD AND FREEZE # load: - file: ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + file: *checkpoint model: lstm freeze: True ################### @@ -78,7 +79,7 @@ pipeline: type: FeedForwardNetwork # LOAD AND FREEZE # load: - file: ~/image-clef-2019/experiments/q_categorization/20190505_130406/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + file: *checkpoint model: classifier freeze: True ################### diff --git a/configs/vqa_med_2019/frozen_pipelines/frozen_word_answer_glove_sum.yml b/configs/vqa_med_2019/frozen_pipelines/frozen_word_answer_glove_sum.yml new file mode 100644 index 0000000..e69de29 diff --git a/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml new file mode 100644 index 0000000..0f8754d --- /dev/null +++ b/configs/vqa_med_2019/frozen_pipelines/input_fusion_processor_io.yml @@ -0,0 +1,73 @@ +# Load config defining problems for training, validation and testing. +default_configs: + vqa_med_2019/frozen_pipelines/frozen_input_fusion_glove_lstm_vgg_att_is_cat.yml, + vqa_med_2019/frozen_pipelines/frozen_question_categorization_glove_rnn_ffn.yml + + +training_validation: + problem: + type: &p_type VQAMED2019 + data_folder: &data_folder ~/data/vqa-med + split: training_validation + categories: all + resize_image: &resize_image [224, 224] + batch_size: 64 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + + dataloader: + # No sampler, process samples in the same order. + shuffle: false + # Use 1 worker, so batches will follow the samples order. + num_workers: 1 + + +hyperparams: + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] + + +# Add component for exporting answers to files. +pipeline: + name: input_fusion_processor_io + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Answer encoding. + pipe6_c123_binary_yn_answer_indexer: + priority: 6.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + # Viewers. + viewer_extended: + priority: 100.4 + type: StreamViewer + sample_number: 0 + input_streams: + indices,image_ids,tokenized_questions, + concatenated_activations_size, + category,names, + answers + + fused_inputs_exporter: + priority: 100.5 + type: StreamFileExporter + separator: '|' + filename: 'fused_inputs.csv' + export_separator_line_to_csv: True + input_streams: + indices #, concatenated_activations + diff --git a/ptp/workers/processor.py b/ptp/workers/processor.py index b5afa68..0abadc7 100644 --- a/ptp/workers/processor.py +++ b/ptp/workers/processor.py @@ -86,20 +86,24 @@ def setup_global_experiment(self): self.logger.error("Cannot use GPU as there are no CUDA-compatible devices present in the system!") exit(-1) + # Config that will be used. + abs_root_configs = None # Check if checkpoint file was indicated. - if chkpt_file == "": - print('Please pass path to and name of the file containing pipeline to be loaded as --load parameter') - exit(-2) + if chkpt_file != "": + #print('Please pass path to and name of the file containing pipeline to be loaded as --load parameter') + #exit(-2) - # Check if file with model exists. - if not path.isfile(chkpt_file): - print('Checkpoint file {} does not exist'.format(chkpt_file)) - exit(-3) + # Check if file with model exists. + if not path.isfile(chkpt_file): + print('Checkpoint file {} does not exist'.format(chkpt_file)) + exit(-3) + + # Extract path. + self.abs_path, _ = path.split(path.dirname(path.expanduser(chkpt_file))) - # Extract path. - self.abs_path, _ = path.split(path.dirname(path.expanduser(chkpt_file))) - print(self.abs_path) + # Use the "default" config. + abs_root_configs = [path.join(self.abs_path, 'training_configuration.yml')] # Check if config file was indicated by the user. if self.app_state.args.config != '': @@ -107,9 +111,20 @@ def setup_global_experiment(self): root_configs = self.app_state.args.config.replace(" ", "").split(',') # If there are - expand them to absolute paths. abs_root_configs = [path.expanduser(config) for config in root_configs] - else: - # Use the "default one". - abs_root_configs = [path.join(self.abs_path, 'training_configuration.yml')] + + # Using name of the first configuration file from command line. + basename = path.basename(root_configs[0]) + # Take config filename without extension. + pipeline_name = path.splitext(basename)[0] + + # Use path to experiments + pipeline. + self.abs_path = path.join(path.expanduser(self.app_state.args.expdir), pipeline_name) + + + if abs_root_configs is None: + print('Please indicate configuration file to be used (--config) and/or pass path to and name of the file containing pipeline to be loaded (--load)') + exit(-2) + # Get the list of configurations which need to be loaded. configs_to_load = config_parsing.recurrent_config_parse(abs_root_configs, [], self.app_state.absolute_config_path)