diff --git a/configs/default/components/masking/string_to_mask.yml b/configs/default/components/masking/string_to_mask.yml index bcc363d..2b74456 100644 --- a/configs/default/components/masking/string_to_mask.yml +++ b/configs/default/components/masking/string_to_mask.yml @@ -4,9 +4,12 @@ # 1. CONFIGURATION PARAMETERS that will be LOADED by the component. #################################################################### -# Value that will be used when word is out of vocavbulary (LOADED) +# Value that will be used when word is out of vocabulary (LOADED) # (Mask for that element will be 0 as well) -out_of_vocabulary_value: -1 +# -100 is the default value used by PyTroch loss functions to specify +# target values that will ignored and does not contribute to the input gradient. +# (ignore_index=-100) +out_of_vocabulary_value: -100 streams: #################################################################### diff --git a/configs/default/components/text/label_indexer.yml b/configs/default/components/text/label_indexer.yml index 5b871e9..bfe9aa0 100644 --- a/configs/default/components/text/label_indexer.yml +++ b/configs/default/components/text/label_indexer.yml @@ -25,9 +25,12 @@ import_word_mappings_from_globals: False # Flag informing whether word mappings will be exported to globals (LOADED) export_word_mappings_to_globals: False -# Value that will be used when word is out of vocavbulary (LOADED) +# Value that will be used when word is out of vocabulary (LOADED) # (Mask for that element will be 0 as well) -out_of_vocabulary_value: -1 +# -100 is the default value used by PyTroch loss functions to specify +# target values that will ignored and does not contribute to the input gradient. +# (ignore_index=-100) +out_of_vocabulary_value: -100 streams: #################################################################### diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml new file mode 100644 index 0000000..d4745b6 --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml @@ -0,0 +1,101 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml + +pipeline: + name: vqa_med_c2_classification_all_rnn_vgg_concat + + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [100, 2, 10, 100] + + # First subpipeline: question. + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + use_logsoftmax: False + initial_state_trainable: False + #num_layers: 5 + hidden_size: 50 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_embeddings_output_size + + # 2nd subpipeline: image size. + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 2.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 3rd subpipeline: image. + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 3.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + type: Concatenation + priority: 4.1 + input_streams: [question_activations,image_size_activations,image_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10],[-1,100]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: output_size + + + classifier: + type: FeedForwardNetwork + hidden_sizes: [100] + priority: 4.2 + streams: + inputs: concatenated_activations + globals: + input_size: output_size + prediction_size: vocabulary_size_c2 + + + #: pipeline diff --git a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml new file mode 100644 index 0000000..3df45b4 --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml @@ -0,0 +1,91 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C2 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c2.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C2 + dataloader: + num_workers: 4 + + +pipeline: + + # Answer encoding. + answer_indexer: + type: LabelIndexer + priority: 0.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c2.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c2 + word_mappings: word_mappings_c2 + + + # Predictions decoder. + prediction_decoder: + type: WordDecoder + priority: 10.1 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: predictions + outputs: predicted_answers + globals: + vocabulary_size: vocabulary_size_c2 + word_mappings: word_mappings_c2 + + # Loss + nllloss: + type: NLLLoss + priority: 10.2 + targets_dim: 1 + streams: + targets: answers_ids + loss: loss + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: answers_ids + + precision_recall: + type: PrecisionRecallStatistics + priority: 100.3 + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + streams: + targets: answers_ids + globals: + word_mappings: word_mappings_c2 + num_classes: vocabulary_size_c2 + + # Viewers. + viewer: + type: StreamViewer + priority: 100.4 + input_streams: questions,category_names,answers,predicted_answers + +#: pipeline diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml index 4ffc007..6996f91 100644 --- a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml @@ -394,7 +394,7 @@ pipeline: # Viewers. viewer: type: StreamViewer - priority: 4.3 + priority: 7.3 input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_binary_masks,pipe6_binary_answers_ids,pipe6_binary_predictions, pipe7_merged_predictions diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml new file mode 100644 index 0000000..ef8f535 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml @@ -0,0 +1,298 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c1_c2_c3_binary + word_mappings: word_mappings_all_c1_c2_c3_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 + C2 + C2 + Binary Y/N question ################# + + # Answer encoding for PIPE 5. + pipe5_all_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_all_answers_ids + globals: + vocabulary_size: vocabulary_size_c1_c2_c3_binary + word_mappings: word_mappings_all_c1_c2_c3_binary + + # Sample masking based on categories. + pipe5_all_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_c2_c3_binary_yn_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_c2_c3_binary_by_question_categories_indices # NOT USED + masks: pipe5_all_masks + + # Model 4: FFN C1 answering + pipe5_all_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_all_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_c2_c3_binary + + pipe5_all_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_all_predictions + masks: pipe5_all_masks + targets: pipe5_all_answers_ids + loss: pipe5_all_loss + + pipe5_all_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_all_masks + predictions: pipe5_all_predictions + targets: pipe5_all_answers_ids + globals: + word_mappings: word_mappings_all_c1_c2_c3_binary + statistics: + precision: pipe5_all_precision + recall: pipe5_all_recall + f1score: pipe5_all_f1score + + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_all_masks,pipe5_all_answers_without_yn_ids,pipe5_all_predictions + + +#: pipeline diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml new file mode 100644 index 0000000..56ab04b --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml @@ -0,0 +1,542 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c2_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0}, {"C2": 0}, {"C3": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + #vocabulary_size: vocabulary_size_all_c1_c2_c3_binary + word_mappings: word_mappings_all_c1_c2_c3_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 question ################# + + # Answer encoding for PIPE 5. + pipe5_c1_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe5_c1_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe5_c1_masks + + # Model 4: FFN C1 answering + pipe5_c1_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c1_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_without_yn + + pipe5_c1_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c1_predictions + masks: pipe5_c1_masks + targets: pipe5_c1_answers_without_yn_ids + loss: pipe5_c1_loss + + pipe5_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c1_masks + predictions: pipe5_c1_predictions + targets: pipe5_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + #num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe5_c1_precision + recall: pipe5_c1_recall + f1score: pipe5_c1_f1score + + ################# PIPE 6: C2 question ################# + + # Answer encoding for PIPE 6. + pipe6_c2_answer_indexer: + type: LabelIndexer + priority: 6.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c2.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_c2_answers_ids + globals: + vocabulary_size: vocabulary_size_c2 + word_mappings: word_mappings_c2 + + # Sample masking based on categories. + pipe6_c2_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_c2_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c2_question_categories_indices # NOT USED + masks: pipe6_c2_masks + + # Model 4: FFN C1 answering + pipe6_c2_ffn: + priority: 6.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c2_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c2 + + pipe6_c2_nllloss: + type: NLLLoss + priority: 6.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_c2_predictions + masks: pipe6_c2_masks + targets: pipe6_c2_answers_ids + loss: pipe6_c2_loss + + pipe6_c2_precision_recall: + type: PrecisionRecallStatistics + priority: 6.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_c2_masks + predictions: pipe6_c2_predictions + targets: pipe6_c2_answers_ids + globals: + word_mappings: word_mappings_c2 + statistics: + precision: pipe6_c2_precision + recall: pipe6_c2_recall + f1score: pipe6_c2_f1score + + ################# PIPE 7: C3 question ################# + + # Answer encoding for PIPE 7. + pipe7_c3_answer_indexer: + type: LabelIndexer + priority: 7.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c3.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe7_c3_answers_ids + globals: + vocabulary_size: vocabulary_size_c3 + word_mappings: word_mappings_c3 + + # Sample masking based on categories. + pipe7_c3_string_to_mask: + priority: 7.2 + type: StringToMask + globals: + word_mappings: category_c3_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c3_question_categories_indices # NOT USED + masks: pipe7_c3_masks + + # Model 4: FFN C1 answering + pipe7_c3_ffn: + priority: 7.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe7_c3_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c3 + + pipe7_c3_nllloss: + type: NLLLoss + priority: 7.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe7_c3_predictions + masks: pipe7_c3_masks + targets: pipe7_c3_answers_ids + loss: pipe7_c3_loss + + pipe7_c3_precision_recall: + type: PrecisionRecallStatistics + priority: 7.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe7_c3_masks + predictions: pipe7_c3_predictions + targets: pipe7_c3_answers_ids + globals: + word_mappings: word_mappings_c3 + #num_classes: vocabulary_size_c3 + statistics: + precision: pipe7_c3_precision + recall: pipe7_c3_recall + f1score: pipe7_c3_f1score + + ################# PIPE 8: BINARY question ################# + + # Answer encoding for pipe 8. + pipe8_binary_answer_indexer: + type: LabelIndexer + priority: 8.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe8_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe8_binary_string_to_mask: + priority: 8.2 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe8_binary_masks + + # Model 4: FFN C1 answering + pipe8_binary_ffn: + priority: 8.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe8_binary_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + + pipe8_binary_nllloss: + type: NLLLoss + priority: 8.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe8_binary_predictions + masks: pipe8_binary_masks + targets: pipe8_binary_answers_ids + loss: pipe8_binary_loss + + pipe8_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 8.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe8_binary_masks + predictions: pipe8_binary_predictions + targets: pipe8_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + #num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe8_binary_precision + recall: pipe8_binary_recall + f1score: pipe8_binary_f1score + + ################# PIPE 9: MERGE ANSWERS ################# + + # Merge predictions. + pipe9_merged_predictions: + type: JoinMaskedPredictions + priority: 9.1 + # Names of used input streams. + input_prediction_streams: [pipe5_c1_predictions, pipe6_c2_predictions, pipe7_c3_predictions, pipe8_binary_predictions] + input_mask_streams: [pipe5_c1_masks, pipe6_c2_masks, pipe7_c3_masks, pipe8_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c2, word_mappings_c3, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_c2_c3_binary + streams: + output_strings: pipe9_merged_predictions + output_indices: pipe9_merged_pred_indices + + # Statistics. + pipe9_merged_precision_recall: + type: PrecisionRecallStatistics + priority: 9.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_c2_c3_binary + streams: + targets: all_answers_ids + predictions: pipe9_merged_pred_indices + statistics: + precision: pipe9_merged_precision + recall: pipe9_merged_recall + f1score: pipe9_merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c2_masks,pipe6_c2_answers_ids,pipe6_c2_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions + + +#: pipeline diff --git a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml new file mode 100644 index 0000000..3b1d952 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml @@ -0,0 +1,472 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C3 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1_c3_binary_yn.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1,C3 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100,{"C1": 0}, {"C3": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + #vocabulary_size: vocabulary_size_all_c1_c3_binary + word_mappings: word_mappings_all_c1_c3_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 question ################# + + # Answer encoding for PIPE 5. + pipe5_c1_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe5_c1_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe5_c1_masks + + # Model 4: FFN C1 answering + pipe5_c1_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c1_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_without_yn + + pipe5_c1_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c1_predictions + masks: pipe5_c1_masks + targets: pipe5_c1_answers_without_yn_ids + loss: pipe5_c1_loss + + pipe5_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c1_masks + predictions: pipe5_c1_predictions + targets: pipe5_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + #num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe5_c1_precision + recall: pipe5_c1_recall + f1score: pipe5_c1_f1score + + ################# PIPE 6: C3 question ################# + + # Answer encoding for PIPE 6. + pipe6_c3_answer_indexer: + type: LabelIndexer + priority: 6.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c3.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_c3_answers_ids + globals: + vocabulary_size: vocabulary_size_c3 + word_mappings: word_mappings_c3 + + # Sample masking based on categories. + pipe6_c3_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_c3_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c3_question_categories_indices # NOT USED + masks: pipe6_c3_masks + + # Model 4: FFN C1 answering + pipe6_c3_ffn: + priority: 6.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c3_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c3 + + pipe6_c3_nllloss: + type: NLLLoss + priority: 6.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_c3_predictions + masks: pipe6_c3_masks + targets: pipe6_c3_answers_ids + loss: pipe6_c3_loss + + pipe6_c3_precision_recall: + type: PrecisionRecallStatistics + priority: 6.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_c3_masks + predictions: pipe6_c3_predictions + targets: pipe6_c3_answers_ids + globals: + word_mappings: word_mappings_c3 + #num_classes: vocabulary_size_c3 + statistics: + precision: pipe6_c3_precision + recall: pipe6_c3_recall + f1score: pipe6_c3_f1score + + ################# PIPE 7: BINARY question ################# + + # Answer encoding for pipe 7. + pipe7_binary_answer_indexer: + type: LabelIndexer + priority: 7.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe7_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe7_binary_string_to_mask: + priority: 7.2 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe7_binary_masks + + # Model 4: FFN C1 answering + pipe7_binary_ffn: + priority: 7.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe7_binary_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + + pipe7_binary_nllloss: + type: NLLLoss + priority: 7.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe7_binary_predictions + masks: pipe7_binary_masks + targets: pipe7_binary_answers_ids + loss: pipe7_binary_loss + + pipe7_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 7.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe7_binary_masks + predictions: pipe7_binary_predictions + targets: pipe7_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + #num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe7_binary_precision + recall: pipe7_binary_recall + f1score: pipe7_binary_f1score + + ################# PIPE 8: MERGE ANSWERS ################# + + # Merge predictions + pipe8_merged_predictions: + type: JoinMaskedPredictions + priority: 8.1 + # Names of used input streams. + input_prediction_streams: [pipe5_c1_predictions, pipe6_c3_predictions, pipe7_binary_predictions] + input_mask_streams: [pipe5_c1_masks, pipe6_c3_masks, pipe7_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c3, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_c3_binary + streams: + output_strings: pipe8_merged_predictions + output_indices: pipe8_merged_pred_indices + + # Statistics. + pipe8_merged_precision_recall: + type: PrecisionRecallStatistics + priority: 8.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_c3_binary + streams: + targets: all_answers_ids + predictions: pipe8_merged_pred_indices + statistics: + precision: pipe8_merged_precision + recall: pipe8_merged_recall + f1score: pipe8_merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 8.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c3_masks,pipe6_c3_answers_ids,pipe6_c3_predictions, pipe7_binary_masks,pipe7_binary_answers_ids,pipe7_binary_predictions, pipe8_merged_predictions + + +#: pipeline diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml index 0d5a5c3..88274b9 100644 --- a/configs/wikitext/wikitext_language_modeling_rnn.yml +++ b/configs/wikitext/wikitext_language_modeling_rnn.yml @@ -72,7 +72,7 @@ pipeline: lstm: type: RecurrentNeuralNetwork priority: 3 - #initial_state_trainable: False + initial_state_trainable: False streams: inputs: embedded_sources globals: @@ -88,4 +88,10 @@ pipeline: targets: indexed_targets loss: loss + # Viewers. + viewer: + type: StreamViewer + priority: 100.1 + input_streams: sources,indexed_targets,targets,predictions + #: pipeline