diff --git a/configs/default/components/text/label_indexer.yml b/configs/default/components/text/label_indexer.yml index 45f6e6f..5b871e9 100644 --- a/configs/default/components/text/label_indexer.yml +++ b/configs/default/components/text/label_indexer.yml @@ -25,6 +25,10 @@ import_word_mappings_from_globals: False # Flag informing whether word mappings will be exported to globals (LOADED) export_word_mappings_to_globals: False +# Value that will be used when word is out of vocavbulary (LOADED) +# (Mask for that element will be 0 as well) +out_of_vocabulary_value: -1 + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml new file mode 100644 index 0000000..62b4389 --- /dev/null +++ b/configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml @@ -0,0 +1,101 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml + +pipeline: + name: vqa_med_c1_classification_all_rnn_vgg_concat + + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [100, 2, 10, 100] + + # First subpipeline: question. + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + use_logsoftmax: False + initial_state_trainable: False + #num_layers: 5 + hidden_size: 50 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_embeddings_output_size + + # 2nd subpipeline: image size. + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 2.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 3rd subpipeline: image. + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 3.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + type: Concatenation + priority: 4.1 + input_streams: [question_activations,image_size_activations,image_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10],[-1,100]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: output_size + + + classifier: + type: FeedForwardNetwork + hidden_sizes: [100] + priority: 4.2 + streams: + inputs: concatenated_activations + globals: + input_size: output_size + prediction_size: vocabulary_size_c1 + + + #: pipeline diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml deleted file mode 100644 index 1d93dd4..0000000 --- a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml +++ /dev/null @@ -1,51 +0,0 @@ -# Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml - -pipeline: - name: vqa_med_c1_classification_vf_question_onehot_bow - - ################# SHARED ################# - - # Questions encoding. - question_tokenizer: - type: SentenceTokenizer - priority: 1.1 - streams: - inputs: questions - outputs: tokenized_questions - - question_onehot_encoder: - type: SentenceOneHotEncoder - priority: 1.2 - data_folder: ~/data/vqa-med - word_mappings_file: questions.all.word.mappings.csv - export_word_mappings_to_globals: True - streams: - inputs: tokenized_questions - outputs: encoded_questions - globals: - vocabulary_size: question_vocabulary_size - - question_bow_encoder: - type: BOWEncoder - priority: 1.3 - streams: - inputs: encoded_questions - outputs: bow_questions - globals: - bow_size: question_vocabulary_size - - - - # Model - classifier: - type: FeedForwardNetwork - hidden_sizes: [100, 100] - priority: 3 - streams: - inputs: bow_questions - globals: - input_size: question_vocabulary_size - prediction_size: vocabulary_size_c1 - -#: pipeline diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml new file mode 100644 index 0000000..785881f --- /dev/null +++ b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml @@ -0,0 +1,107 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml + +pipeline: + name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization + + + ################# SHARED ################# + + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 0.2 + streams: + inputs: questions + outputs: tokenized_questions + + ################# FLOW 0: CATEGORY ################# + + # Model 2: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE + load: ~/Documents/image-clef-2019/experiments/q_categorization/20190416_103111/checkpoints/vqa_med_question_categorization_rnn_best.pt + freeze: True + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE + load: ~/Documents/image-clef-2019/experiments/q_categorization/20190416_103111/checkpoints/vqa_med_question_categorization_rnn_best.pt + freeze: True + prediction_mode: Last + priority: 0.4 + initial_state_trainable: False + streams: + inputs: embedded_questions + predictions: predicted_question_categories + globals: + input_size: embeddings_size + prediction_size: num_categories # C1,C2,C3,C4,BIN,UNK + + + category_decoder: + type: WordDecoder + priority: 0.5 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: predicted_question_categories + outputs: predicted_question_categories_ids + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + ################# FLOW C1: question ################# + + flow_c1_string_to_mask: + type: StringToMask + priority: 1.1 + globals: + word_mappings: category_word_mappings + streams: + strings: predicted_question_categories_ids + string_indices: flow_c1_targets + masks: flow_c1_masks + + # Model 1: Embeddings + flow_c1_question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: flow_c1_embedded_questions + globals: + embeddings_size: flow_c1_embeddings_size + + # Model 2: RNN + flow_c1_lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + initial_state_trainable: False + #num_layers: 5 + hidden_size: 50 + streams: + inputs: flow_c1_embedded_questions + globals: + input_size: flow_c1_embeddings_size + prediction_size: vocabulary_size_c1 + + +#: pipeline diff --git a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml index abf631c..27bd757 100644 --- a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml +++ b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml @@ -60,15 +60,15 @@ pipeline: loss: loss # Statistics. - accuracy: - type: AccuracyStatistics - priority: 100.1 - streams: - targets: answers_ids - batch_size: type: BatchSizeStatistics - priority: 100.2 + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: answers_ids precision_recall: type: PrecisionRecallStatistics diff --git a/configs/vqa_med_2019/c3_classification/default_c3_classification.yml b/configs/vqa_med_2019/c3_classification/default_c3_classification.yml index 863ed6a..e3d2a39 100644 --- a/configs/vqa_med_2019/c3_classification/default_c3_classification.yml +++ b/configs/vqa_med_2019/c3_classification/default_c3_classification.yml @@ -60,15 +60,15 @@ pipeline: loss: loss # Statistics. - accuracy: - type: AccuracyStatistics - priority: 100.1 - streams: - targets: answers_ids - batch_size: type: BatchSizeStatistics - priority: 100.2 + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: answers_ids precision_recall: type: PrecisionRecallStatistics diff --git a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml index e39d87b..b263e62 100644 --- a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml +++ b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml @@ -1,6 +1,11 @@ # Load config defining problems for training, validation and testing. default_configs: vqa_med_2019/default_vqa_med_2019.yml +training: + # settings parameters + terminal_conditions: + loss_stop: 1.0e-3 + pipeline: # Predictions decoder. @@ -26,15 +31,15 @@ pipeline: loss: loss # Statistics. - accuracy: - type: AccuracyStatistics - priority: 100.1 - streams: - targets: category_ids - batch_size: type: BatchSizeStatistics - priority: 100.2 + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: category_ids precision_recall: type: PrecisionRecallStatistics diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml index 16035d1..7097041 100644 --- a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml +++ b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml @@ -31,12 +31,10 @@ pipeline: prediction_mode: Last priority: 3 initial_state_trainable: False - #num_layers: 5 - #hidden_size: 1000 streams: inputs: embedded_questions globals: input_size: embeddings_size - prediction_size: num_categories # C1,C2,C3,C4 + prediction_size: num_categories # C1,C2,C3,C4,BIN,UNK #: pipeline diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml new file mode 100644 index 0000000..7988a86 --- /dev/null +++ b/configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml @@ -0,0 +1,65 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/question_categorization/default_question_categorization.yml + +pipeline: + name: vqa_med_question_categorization_rnn_ffn + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [100, 2, 10, 100] + + + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + # Model 2: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: lstm_activations_questions + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model + classifier: + type: FeedForwardNetwork + hidden: [50] + priority: 1.4 + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + +#: pipeline diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml new file mode 100644 index 0000000..4c2fe60 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml @@ -0,0 +1,240 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + ################# PIPE 0: SHARED QUESTION ################# + + # Questions encoding. + pipe0_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + #load: + # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + # model: question_embeddings + #freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + #load: + # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + # model: lstm + #freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: lstm_activations_questions + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + ################# PIPE 1: CATEGORY ################# + + ################# PIPE 2: C1 question ################# + + # Answer encoding for pipe 2. + pipe2_c1_answer_indexer: + type: LabelIndexer + priority: 2.0 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe2_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe2_c1_string_to_mask: + priority: 2.1 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: category_names # predicted_question_categories_names ## FOR NOW! + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe2_c1_masks + + # Model 4: FFN C1 answering + pipe2_c1_lstm: + priority: 2.2 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: pipe2_c1_predictions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: vocabulary_size_c1_without_yn + + pipe2_c1_nllloss: + type: NLLLoss + priority: 2.3 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe2_c1_predictions + masks: pipe2_c1_masks + targets: pipe2_c1_answers_without_yn_ids + loss: pipe2_c1_loss + + pipe2_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 2.4 + use_masking: True + use_word_mappings: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe2_c1_masks + predictions: pipe2_c1_predictions + targets: pipe2_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe2_c1_precision + recall: pipe2_c1_recall + f1score: pipe2_c1_f1score + + ################# PIPE 3: BINARY question ################# + + # Answer encoding for pipe 3. + pipe3_binary_answer_indexer: + type: LabelIndexer + priority: 3.0 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe3_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe3_binary_string_to_mask: + priority: 3.1 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: category_names # predicted_question_categories_names ## FOR NOW! + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe3_binary_masks + + # Model 4: FFN C1 answering + pipe3_binary_lstm: + priority: 3.2 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: pipe3_binary_predictions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: vocabulary_size_binary_yn + + pipe3_binary_nllloss: + type: NLLLoss + priority: 3.3 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe3_binary_predictions + masks: pipe3_binary_masks + targets: pipe3_binary_answers_ids + loss: pipe3_binary_loss + + pipe3_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 3.4 + use_masking: True + use_word_mappings: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe3_binary_masks + predictions: pipe3_binary_predictions + targets: pipe3_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe3_binary_precision + recall: pipe3_binary_recall + f1score: pipe3_binary_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 100.1 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe2_c1_masks,pipe2_c1_answers_without_yn_ids,pipe2_c1_predictions, pipe3_binary_masks,pipe3_binary_answers_ids,pipe3_binary_predictions + + +#: pipeline diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml new file mode 100644 index 0000000..4ffc007 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml @@ -0,0 +1,401 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c1_binasry + word_mappings: word_mappings_all_c1_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 question ################# + + # Answer encoding for PIPE 5. + pipe5_c1_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe5_c1_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe5_c1_masks + + # Model 4: FFN C1 answering + pipe5_c1_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c1_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_without_yn + + pipe5_c1_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c1_predictions + masks: pipe5_c1_masks + targets: pipe5_c1_answers_without_yn_ids + loss: pipe5_c1_loss + + pipe5_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c1_masks + predictions: pipe5_c1_predictions + targets: pipe5_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe5_c1_precision + recall: pipe5_c1_recall + f1score: pipe5_c1_f1score + + ################# PIPE 6: BINARY question ################# + + # Answer encoding for pipe 6. + pipe6_binary_answer_indexer: + type: LabelIndexer + priority: 6.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe6_binary_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe6_binary_masks + + # Model 4: FFN C1 answering + pipe6_binary_ffn: + priority: 6.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_binary_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + + pipe6_binary_nllloss: + type: NLLLoss + priority: 6.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_binary_predictions + masks: pipe6_binary_masks + targets: pipe6_binary_answers_ids + loss: pipe6_binary_loss + + pipe6_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 6.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_binary_masks + predictions: pipe6_binary_predictions + targets: pipe6_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe6_binary_precision + recall: pipe6_binary_recall + f1score: pipe6_binary_f1score + + ################# PIPE 7: MERGE ANSWERS ################# + + # Merge predictions + pipe7_merged_predictions: + type: JoinMaskedPredictions + priority: 7.1 + # Names of used input streams. + input_prediction_streams: [pipe5_c1_predictions, pipe6_binary_predictions] + input_mask_streams: [pipe5_c1_masks, pipe6_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_binary + streams: + output_strings: pipe7_merged_predictions + output_indices: pipe7_merged_pred_indices + + # Statistics. + pipe7_merged_precision_recall: + type: PrecisionRecallStatistics + priority: 7.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_binary + streams: + targets: all_answers_ids + predictions: pipe7_merged_pred_indices + statistics: + precision: pipe7_merged_precision + recall: pipe7_merged_recall + f1score: pipe7_merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 4.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_binary_masks,pipe6_binary_answers_ids,pipe6_binary_predictions, pipe7_merged_predictions + + +#: pipeline diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml new file mode 100644 index 0000000..b7281c9 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml @@ -0,0 +1,371 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + ################# PIPE 0: SHARED QUESTION ################# + + # Questions encoding. + pipe0_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + #load: + # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + # model: question_embeddings + #freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + #load: + # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + # model: lstm + #freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: lstm_activations_questions + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe0_all_answer_indexer: + type: LabelIndexer + priority: 0.6 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c1_binasry + word_mappings: word_mappings_all_c1_binary + + + ################# PIPE 1: CATEGORY ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe1_embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe1_embedded_questions + predictions: pipe1_lstm_activations_questions + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe1_classifier: + priority: 1.3 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe1_lstm_activations_questions + predictions: pipe1_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe1_category_decoder: + priority: 1.4 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe1_predicted_question_categories_preds + outputs: pipe1_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe1_category_accuracy: + type: AccuracyStatistics + priority: 1.5 + streams: + targets: category_ids + predictions: pipe1_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 2: C1 question ################# + + # Answer encoding for PIPE 2. + pipe2_c1_answer_indexer: + type: LabelIndexer + priority: 2.0 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe2_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe2_c1_string_to_mask: + priority: 2.1 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: pipe1_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe2_c1_masks + + # Model 4: FFN C1 answering + pipe2_c1_lstm: + priority: 2.2 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: pipe2_c1_predictions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: vocabulary_size_c1_without_yn + + pipe2_c1_nllloss: + type: NLLLoss + priority: 2.3 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe2_c1_predictions + masks: pipe2_c1_masks + targets: pipe2_c1_answers_without_yn_ids + loss: pipe2_c1_loss + + pipe2_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 2.4 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe2_c1_masks + predictions: pipe2_c1_predictions + targets: pipe2_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe2_c1_precision + recall: pipe2_c1_recall + f1score: pipe2_c1_f1score + + ################# PIPE 3: BINARY question ################# + + # Answer encoding for PIPE 3. + pipe3_binary_answer_indexer: + type: LabelIndexer + priority: 3.0 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe3_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe3_binary_string_to_mask: + priority: 3.1 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: pipe1_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe3_binary_masks + + # Model 4: FFN C1 answering + pipe3_binary_lstm: + priority: 3.2 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: pipe3_binary_predictions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: vocabulary_size_binary_yn + + pipe3_binary_nllloss: + type: NLLLoss + priority: 3.3 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe3_binary_predictions + masks: pipe3_binary_masks + targets: pipe3_binary_answers_ids + loss: pipe3_binary_loss + + pipe3_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 3.4 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe3_binary_masks + predictions: pipe3_binary_predictions + targets: pipe3_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe3_binary_precision + recall: pipe3_binary_recall + f1score: pipe3_binary_f1score + + ################# PIPE 4: MERGE ANSWERS ################# + + # Merge predictions + merged_predictions: + type: JoinMaskedPredictions + priority: 4.1 + # Names of used input streams. + input_prediction_streams: [pipe2_c1_predictions, pipe3_binary_predictions] + input_mask_streams: [pipe2_c1_masks, pipe3_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_binary + streams: + output_strings: merged_predictions + output_indices: merged_pred_indices + + # Statistics. + merged_precision_recall: + type: PrecisionRecallStatistics + priority: 4.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_binary + streams: + targets: all_answers_ids + predictions: merged_pred_indices + statistics: + precision: merged_precision + recall: merged_recall + f1score: merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 4.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe2_c1_masks,pipe2_c1_answers_without_yn_ids,pipe2_c1_predictions, pipe3_binary_masks,pipe3_binary_answers_ids,pipe3_binary_predictions, merged_predictions + + +#: pipeline diff --git a/ptp/application/pipeline_manager.py b/ptp/application/pipeline_manager.py index f259523..a6b550a 100644 --- a/ptp/application/pipeline_manager.py +++ b/ptp/application/pipeline_manager.py @@ -295,18 +295,31 @@ def load_models(self): The 'load' variable should contain path with filename of the checkpoint from which we want to load particular model. """ error = False - log_str = '' + log_str = 'Trying to load the pre-trained models:\n' # Iterate over models. for model in self.models: if "load" in model.config.keys(): try: + # Determine whether checkpoint is a string (filename) or list. + checkpoint = model.config["load"] + if type(checkpoint) == str: + checkpoint_filename = checkpoint + checkpoint_model = None + else: # Assume dictionary. + if 'file' not in checkpoint.keys() or 'model' not in checkpoint.keys(): + log_str += " + The 'load' section of model '{}' is incorrect: it must contain a single string (with checkpoint filename) or a dictionary (with two sections: checkpoint 'file' and 'model' to load)\n".format( + model.name + ) + error = True + continue + # Ok! + checkpoint_filename = checkpoint["file"] + checkpoint_model = checkpoint["model"] + # Check if file exists. - checkpoint_filename = model.config["load"] - # TODO: if checkpoint_file is a list!! checkpoint_filename = os.path.expanduser(checkpoint_filename.replace(" ","")) - # Check if file exists. if not os.path.isfile(checkpoint_filename): - log_str += "Could not import parameters of model '{}' from checkpoint {} as file does not exist\n".format( + log_str += " + Could not import parameters of model '{}' from checkpoint '{}' as file does not exist\n".format( model.name, checkpoint_filename ) @@ -317,7 +330,7 @@ def load_models(self): # This is to be able to load a CUDA-trained model on CPU chkpt = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage) - log_str += "Importing model '{}' from pipeline '{}' parameters from checkpoint from {} (episode: {}, loss: {}, status: {}):\n".format( + log_str += " + Importing model '{}' from pipeline '{}' parameters from checkpoint from {} (episode: {}, loss: {}, status: {})\n".format( model.name, chkpt['name'], chkpt['timestamp'], @@ -326,7 +339,8 @@ def load_models(self): chkpt['status'] ) # Load model. - model.load_from_checkpoint(chkpt) + model.load_from_checkpoint(chkpt, checkpoint_model) + log_str += " + Model '{}' [{}] params loaded\n".format(model.name, type(model).__name__) except KeyError: log_str += " + Model '{}' [{}] params not found in checkpoint!\n".format(model.name, type(model).__name__) diff --git a/ptp/components/masking/join_masked_predictions.py b/ptp/components/masking/join_masked_predictions.py index fa0eb3d..0b418ea 100644 --- a/ptp/components/masking/join_masked_predictions.py +++ b/ptp/components/masking/join_masked_predictions.py @@ -123,10 +123,12 @@ def __call__(self, data_dict): - "outputs": added output field containing tensor [BATCH_SIZE x ...] """ - # Get inputs masks + # Get inputs masks. masks = [] for imsk in self.input_mask_stream_keys: - masks.append(data_dict[imsk].data.cpu().numpy()) + # Get mask from stream. + mask = data_dict[imsk] + masks.append(mask.data.cpu().numpy()) # Sum all masks and make sure that they are complementary. masks_sum = np.sum(masks, axis=0) diff --git a/ptp/components/models/model.py b/ptp/components/models/model.py index f907111..f89ad57 100644 --- a/ptp/components/models/model.py +++ b/ptp/components/models/model.py @@ -73,13 +73,17 @@ def save_to_checkpoint(self, chkpt): chkpt[self.name] = self.state_dict() - def load_from_checkpoint(self, chkpt): + def load_from_checkpoint(self, chkpt, section=None): """ Loads state dictionary from checkpoint. - :param: Checkpoint (dictionary) loaded from file. + :param chkpt: Checkpoint (dictionary) loaded from file. + + :param section: Name of the section containing params (DEFAULT: None, means that model name from current configuration will be used)\ """ - self.load_state_dict(chkpt[self.name]) + if section is None: + section = self.name + self.load_state_dict(chkpt[section]) def freeze(self): """ diff --git a/ptp/components/text/label_indexer.py b/ptp/components/text/label_indexer.py index f9728ea..410aa46 100644 --- a/ptp/components/text/label_indexer.py +++ b/ptp/components/text/label_indexer.py @@ -42,6 +42,9 @@ def __init__(self, name, config): self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] + # Get value from configuration. + self.out_of_vocabulary_value = self.config["out_of_vocabulary_value"] + def input_data_definitions(self): """ @@ -81,7 +84,11 @@ def __call__(self, data_dict): for sample in inputs: assert not isinstance(sample, (list,)), 'This encoder requires input sample to contain a single word' # Process single token. - output_sample = self.word_to_ix[sample] + if sample in self.word_to_ix.keys(): + output_sample = self.word_to_ix[sample] + else: + # Word out of vocabulary. + output_sample = self.out_of_vocabulary_value outputs_list.append(output_sample) # Transform to tensor. output_tensor = torch.tensor(outputs_list)