From 805429be13ec034deec4e6de500e24b1d72296e2 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 23 Apr 2019 10:44:35 -0700 Subject: [PATCH 01/15] added logging log_dir at the end of training --- ptp/workers/online_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ptp/workers/online_trainer.py b/ptp/workers/online_trainer.py index 641a3c6..ec33760 100644 --- a/ptp/workers/online_trainer.py +++ b/ptp/workers/online_trainer.py @@ -334,6 +334,7 @@ def run_experiment(self): # Finalize statistics collection. self.finalize_statistics_collection() self.finalize_tensorboard() + self.logger.info("Experiment logged to: {}".format(self.log_dir)) def main(): From 02df0226923c64457d6c20f690c3f7559af0aaf5 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 23 Apr 2019 12:48:11 -0700 Subject: [PATCH 02/15] Initial version of RN --- .../models/vqa/relational_network.yml | 55 +++++++ .../c2_classification_all_rnn_vgg16_mcb.yml | 1 - ...ification_all_rnn_vgg16_relational_net.yml | 94 +++++++++++ ptp/components/models/__init__.py | 2 + .../models/vqa/relational_network.py | 147 ++++++++++++++++++ 5 files changed, 298 insertions(+), 1 deletion(-) create mode 100644 configs/default/components/models/vqa/relational_network.yml create mode 100644 configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml create mode 100644 ptp/components/models/vqa/relational_network.py diff --git a/configs/default/components/models/vqa/relational_network.yml b/configs/default/components/models/vqa/relational_network.yml new file mode 100644 index 0000000..791d62e --- /dev/null +++ b/configs/default/components/models/vqa/relational_network.yml @@ -0,0 +1,55 @@ +# This file defines the default values for the ElementWiseMultiplication model. + +#################################################################### +# 1. CONFIGURATION PARAMETERS that will be LOADED by the component. +#################################################################### + +# Dropout rate (LOADED) +# Default: 0 (means that it is turned off) +dropout_rate: 0 + +# Size of the output of g_theta network/output after concatenation (LOADED) +output_size: 256 + +streams: + #################################################################### + # 2. Keymappings associated with INPUT and OUTPUT streams. + #################################################################### + + # Stream containing batch of encoded images (INPUT) + feature_maps: feature_maps + + # Stream containing batch of encoded questions (INPUT) + question_encodings: question_encodings + + # Stream containing outputs (OUTPUT) + outputs: outputs + +globals: + #################################################################### + # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. + #################################################################### + + # Height of the features tensor (RETRIEVED) + feature_maps_height: feature_maps_height + + # Width of the features tensor (RETRIEVED) + feature_maps_width: feature_maps_width + + # Depth of the features tensor (RETRIEVED) + feature_maps_depth: feature_maps_depth + + # Size of the question encodings input (RETRIEVED) + question_encoding_size: question_encoding_size + + #################################################################### + # 4. Keymappings associated with GLOBAL variables that will be SET. + #################################################################### + + # Size of the output (SET) + output_size: output_size + + #################################################################### + # 5. Keymappings associated with statistics that will be ADDED. + #################################################################### + diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml index 0ea068e..75e41ed 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml @@ -72,7 +72,6 @@ pipeline: question_image_fusion: priority: 4.1 type: MultimodalCompactBilinearPooling - dropout_rate: 0.5 streams: image_encodings: image_activations question_encodings: question_activations diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml new file mode 100644 index 0000000..232a023 --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml @@ -0,0 +1,94 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml + +# Training parameters: +training: + problem: + batch_size: 5 +validation: + problem: + batch_size: 5 + +pipeline: + name: c2_classification_all_rnn_vgg16_relational_net + + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size] + values: [100] + + ################# PIPE 0: question ################# + # Questions encoding. + question_tokenizer: + priority: 1.1 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + priority: 1.2 + type: SentenceEmbeddings + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + priority: 1.3 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + use_logsoftmax: False + initial_state_trainable: False + hidden_size: 128 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + ################# PIPE 2: image ################# + # Image encoder. + image_encoder: + priority: 3.1 + type: TorchVisionWrapper + return_feature_maps: True + streams: + inputs: images + outputs: feature_maps + + ################# PIPE 3: fusion + classification ################# + # Element wise multiplication + FF. + question_image_fusion: + priority: 4.1 + type: RelationalNetwork + dropout_rate: 0.5 + output_size: [256] + streams: + question_encodings: question_activations + outputs: fused_image_question_activations + globals: + question_encoding_size: question_encoder_output_size + output_size: fused_image_question_activation_size + + classifier: + priority: 4.2 + type: FeedForwardNetwork + hidden_sizes: [512,256] + dropout_rate: 0.5 + streams: + inputs: fused_image_question_activations + globals: + input_size: fused_image_question_activation_size + prediction_size: vocabulary_size_c2 + + #: pipeline diff --git a/ptp/components/models/__init__.py b/ptp/components/models/__init__.py index b8b6093..e362654 100644 --- a/ptp/components/models/__init__.py +++ b/ptp/components/models/__init__.py @@ -9,6 +9,7 @@ from .vqa.element_wise_multiplication import ElementWiseMultiplication from .vqa.multimodal_compact_bilinear_pooling import MultimodalCompactBilinearPooling +from .vqa.relational_network import RelationalNetwork __all__ = [ 'ConvNetEncoder', @@ -21,4 +22,5 @@ 'SentenceEmbeddings', 'ElementWiseMultiplication', 'MultimodalCompactBilinearPooling', + 'RelationalNetwork', ] diff --git a/ptp/components/models/vqa/relational_network.py b/ptp/components/models/vqa/relational_network.py new file mode 100644 index 0000000..88452b6 --- /dev/null +++ b/ptp/components/models/vqa/relational_network.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (C) IBM Corporation 2018 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = "Tomasz Kornuta" + + +import torch + +from ptp.components.models.model import Model +from ptp.data_types.data_definition import DataDefinition + + +class RelationalNetwork(Model): + """ + Model implements relational network. + Model expects image (CNN) features and encoded question. + + + """ + def __init__(self, name, config): + """ + Initializes the model, creates the required layers. + + :param name: Name of the model (taken from the configuration file). + + :param config: Parameters read from configuration file. + :type config: ``ptp.configuration.ConfigInterface`` + + """ + super(RelationalNetwork, self).__init__(name, RelationalNetwork, config) + + # Get key mappings. + self.key_feature_maps = self.stream_keys["feature_maps"] + self.key_question_encodings = self.stream_keys["question_encodings"] + self.key_outputs = self.stream_keys["outputs"] + + # Retrieve input sizes from globals. + self.feature_maps_height = self.globals["feature_maps_height"] + self.feature_maps_width = self.globals["feature_maps_width"] + self.feature_maps_depth = self.globals["feature_maps_depth"] + self.question_encoding_size = self.globals["question_encoding_size"] + + + # Create "object" coordinates. + self.obj_coords = [] + for h in range(self.feature_maps_height): + for w in range(self.feature_maps_width): + self.obj_coords.append((h,w)) + + # Get output_size from config and send it to globals. + self.output_size = self.config["output_size"] + self.globals["output_size"] = self.output_size + + # Calculate input size to the g_theta: two "objects" + question (+ optionally: image size) + input_size = 2 * self.feature_maps_depth + self.question_encoding_size + + # Retrieve dropout rate value - if set, will put dropout between every layer. + dropout_rate = self.config["dropout_rate"] + + # Create the model, i.e. the "relational" g_theta MLP. + self.g_theta = torch.nn.Sequential( + torch.nn.Linear(input_size, self.output_size), + # Create activation layer. + torch.nn.ReLU(), + # Create dropout layer. + torch.nn.Dropout(dropout_rate), + torch.nn.Linear(self.output_size, self.output_size), + torch.nn.ReLU(), + torch.nn.Dropout(dropout_rate), + torch.nn.Linear(self.output_size, self.output_size), + torch.nn.ReLU(), + torch.nn.Dropout(dropout_rate), + torch.nn.Linear(self.output_size, self.output_size) + ) + + + + def input_data_definitions(self): + """ + Function returns a dictionary with definitions of input data that are required by the component. + + :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + return { + self.key_feature_maps: DataDefinition([-1, self.feature_maps_depth, self.feature_maps_height, self.feature_maps_width], [torch.Tensor], "Batch of feature maps [BATCH_SIZE x FEAT_DEPTH x FEAT_HEIGHT x FEAT_WIDTH]"), + self.key_question_encodings: DataDefinition([-1, self.question_encoding_size], [torch.Tensor], "Batch of encoded questions [BATCH_SIZE x QUESTION_ENCODING_SIZE]"), + } + + + def output_data_definitions(self): + """ + Function returns a dictionary with definitions of output data produced the component. + + :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + return { + self.key_outputs: DataDefinition([-1, self.output_size], [torch.Tensor], "Batch of outputs [BATCH_SIZE x OUTPUT_SIZE]") + } + + def forward(self, data_dict): + """ + Main forward pass of the model. + + :param data_dict: DataDict({'images',**}) + :type data_dict: ``ptp.dadatypes.DataDict`` + """ + + # Unpack DataDict. + feat_m = data_dict[self.key_feature_maps] + enc_q = data_dict[self.key_question_encodings] + + summed_relations = None + # Iterate through all pairs of "objects". + for (h1,w1) in self.obj_coords: + for (h2,w2) in self.obj_coords: + # Get feature maps. + fm1 = feat_m[:, :, h1,w1].view(-1, self.feature_maps_depth) + fm2 = feat_m[:, :, h2,w2].view(-1, self.feature_maps_depth) + # Concatenate with question. + concat = torch.cat([fm1, fm2, enc_q], dim=1) + + # Pass it through g_theta. + rel = self.g_theta(concat) + + # Add to relations. + if summed_relations is None: + summed_relations = rel + else: + # Element wise sum. + summed_relations += rel + + # Add outputs to datadict. + data_dict.extend({self.key_outputs: summed_relations}) From b07d633c02752b0dd03d5d02aa714ca3e5ab23b6 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 23 Apr 2019 12:50:55 -0700 Subject: [PATCH 03/15] typo fix in config --- .../c2_classification_all_rnn_vgg16_relational_net.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml index 232a023..50f1621 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml @@ -72,7 +72,7 @@ pipeline: priority: 4.1 type: RelationalNetwork dropout_rate: 0.5 - output_size: [256] + output_size: 256 streams: question_encodings: question_activations outputs: fused_image_question_activations @@ -83,7 +83,7 @@ pipeline: classifier: priority: 4.2 type: FeedForwardNetwork - hidden_sizes: [512,256] + hidden_sizes: [256,256] dropout_rate: 0.5 streams: inputs: fused_image_question_activations From c1d471fd2f54f3faeca9c630d800a0dc955e9443 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 23 Apr 2019 12:51:56 -0700 Subject: [PATCH 04/15] rn config batch size commented --- ...2_classification_all_rnn_vgg16_relational_net.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml index 50f1621..7382308 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml @@ -2,12 +2,12 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml # Training parameters: -training: - problem: - batch_size: 5 -validation: - problem: - batch_size: 5 +#training: +# problem: +# batch_size: 5 +#validation: +# problem: +# batch_size: 5 pipeline: name: c2_classification_all_rnn_vgg16_relational_net From 123187b306b0765ba22438795678d2c7a750e1d4 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 23 Apr 2019 12:53:10 -0700 Subject: [PATCH 05/15] rn config batch size --- ...2_classification_all_rnn_vgg16_relational_net.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml index 7382308..7e2f657 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml @@ -2,12 +2,12 @@ default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml # Training parameters: -#training: -# problem: -# batch_size: 5 -#validation: -# problem: -# batch_size: 5 +training: + problem: + batch_size: 64 +validation: + problem: + batch_size: 64 pipeline: name: c2_classification_all_rnn_vgg16_relational_net From 2eabb0d8789d09c5fee41d4cbc26f4f3a99f98a9 Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 24 Apr 2019 09:47:40 -0700 Subject: [PATCH 06/15] Modified settings for relational network pipe --- ...lassification_all_rnn_vgg16_relational_net.yml | 15 +++++++++------ ptp/components/models/vqa/relational_network.py | 2 ++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml index 7e2f657..8601775 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml @@ -32,8 +32,8 @@ pipeline: question_embeddings: priority: 1.2 type: SentenceEmbeddings - embeddings_size: 50 - pretrained_embeddings_file: glove.6B.50d.txt + embeddings_size: 200 + pretrained_embeddings_file: glove.6B.200d.txt data_folder: ~/data/vqa-med word_mappings_file: questions.all.word.mappings.csv streams: @@ -47,8 +47,9 @@ pipeline: cell_type: LSTM prediction_mode: Last use_logsoftmax: False - initial_state_trainable: False - hidden_size: 128 + initial_state_trainable: True + dropout_rate: 0.5 + hidden_size: 50 streams: inputs: embedded_questions predictions: question_activations @@ -62,6 +63,8 @@ pipeline: priority: 3.1 type: TorchVisionWrapper return_feature_maps: True + frozen: True + freeze: True streams: inputs: images outputs: feature_maps @@ -72,7 +75,7 @@ pipeline: priority: 4.1 type: RelationalNetwork dropout_rate: 0.5 - output_size: 256 + output_size: 100 streams: question_encodings: question_activations outputs: fused_image_question_activations @@ -83,7 +86,7 @@ pipeline: classifier: priority: 4.2 type: FeedForwardNetwork - hidden_sizes: [256,256] + hidden_sizes: [100,100] dropout_rate: 0.5 streams: inputs: fused_image_question_activations diff --git a/ptp/components/models/vqa/relational_network.py b/ptp/components/models/vqa/relational_network.py index 88452b6..5b5763a 100644 --- a/ptp/components/models/vqa/relational_network.py +++ b/ptp/components/models/vqa/relational_network.py @@ -30,6 +30,8 @@ class RelationalNetwork(Model): Model expects image (CNN) features and encoded question. + Santoro, A., Raposo, D., Barrett, D. G., Malinowski, M., Pascanu, R., Battaglia, P., & Lillicrap, T. (2017). A simple neural network module for relational reasoning. In Advances in neural information processing systems (pp. 4967-4976). + Reference paper: https://arxiv.org/abs/1706.01427. """ def __init__(self, name, config): """ From 8cbbf99b4a4da602e908ba430595e24d53bc0b8c Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 24 Apr 2019 09:51:40 -0700 Subject: [PATCH 07/15] changed settings (larger glove, dropout) for c2 ewm_size --- ...c2_classification_all_rnn_vgg16_ewm_size.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml index 91ff5d1..1cfb431 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml @@ -1,6 +1,14 @@ # Load config defining problems for training, validation and testing. default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml +# Training parameters: +training: + problem: + batch_size: 64 +validation: + problem: + batch_size: 64 + pipeline: name: c2_classification_all_rnn_vgg16_ewm_size @@ -24,8 +32,8 @@ pipeline: question_embeddings: priority: 1.2 type: SentenceEmbeddings - embeddings_size: 50 - pretrained_embeddings_file: glove.6B.50d.txt + embeddings_size: 200 + pretrained_embeddings_file: glove.6B.200d.txt data_folder: ~/data/vqa-med word_mappings_file: questions.all.word.mappings.csv streams: @@ -39,8 +47,9 @@ pipeline: cell_type: LSTM prediction_mode: Last use_logsoftmax: False - initial_state_trainable: False + initial_state_trainable: True hidden_size: 50 + dropout_rate: 0.5 streams: inputs: embedded_questions predictions: question_activations @@ -117,7 +126,7 @@ pipeline: classifier: priority: 5.3 type: FeedForwardNetwork - hidden_sizes: [110] + hidden_sizes: [100] dropout_rate: 0.5 streams: inputs: concatenated_activations From 272441988e05689da2bd55c37ad47d0fa1ba9cca Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 24 Apr 2019 21:21:52 -0700 Subject: [PATCH 08/15] reverted some changesin c2: commented dropout in rnn --- .../c2_classification_all_rnn_vgg16_ewm_size.yml | 6 +++--- .../c2_classification_all_rnn_vgg16_relational_net.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml index 1cfb431..72c5e1a 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_ewm_size.yml @@ -32,8 +32,8 @@ pipeline: question_embeddings: priority: 1.2 type: SentenceEmbeddings - embeddings_size: 200 - pretrained_embeddings_file: glove.6B.200d.txt + embeddings_size: 100 + pretrained_embeddings_file: glove.6B.100d.txt data_folder: ~/data/vqa-med word_mappings_file: questions.all.word.mappings.csv streams: @@ -49,7 +49,7 @@ pipeline: use_logsoftmax: False initial_state_trainable: True hidden_size: 50 - dropout_rate: 0.5 + #dropout_rate: 0.5 streams: inputs: embedded_questions predictions: question_activations diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml index 8601775..5268ba7 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_relational_net.yml @@ -32,8 +32,8 @@ pipeline: question_embeddings: priority: 1.2 type: SentenceEmbeddings - embeddings_size: 200 - pretrained_embeddings_file: glove.6B.200d.txt + embeddings_size: 100 + pretrained_embeddings_file: glove.6B.100d.txt data_folder: ~/data/vqa-med word_mappings_file: questions.all.word.mappings.csv streams: @@ -48,7 +48,7 @@ pipeline: prediction_mode: Last use_logsoftmax: False initial_state_trainable: True - dropout_rate: 0.5 + #dropout_rate: 0.5 hidden_size: 50 streams: inputs: embedded_questions From 41cb4729e9ff6e1cee3ab981d90071eb4c617558 Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 24 Apr 2019 21:22:20 -0700 Subject: [PATCH 09/15] simple config for c4 ewm --- ..._classification_all_rnn_vgg16_ewm_size.yml | 130 ++++++++++++++++++ .../default_c4_classification.yml | 98 +++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 configs/vqa_med_2019/c4_classification/c4_classification_all_rnn_vgg16_ewm_size.yml create mode 100644 configs/vqa_med_2019/c4_classification/default_c4_classification.yml diff --git a/configs/vqa_med_2019/c4_classification/c4_classification_all_rnn_vgg16_ewm_size.yml b/configs/vqa_med_2019/c4_classification/c4_classification_all_rnn_vgg16_ewm_size.yml new file mode 100644 index 0000000..a9b8266 --- /dev/null +++ b/configs/vqa_med_2019/c4_classification/c4_classification_all_rnn_vgg16_ewm_size.yml @@ -0,0 +1,130 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml + +pipeline: + name: c4_classification_all_rnn_vgg16_ewm_size + + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size] + values: [100, 100, 100, 2, 10] + + ################# PIPE 0: question ################# + # Questions encoding. + question_tokenizer: + priority: 1.1 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + priority: 1.2 + type: SentenceEmbeddings + embeddings_size: 100 + pretrained_embeddings_file: glove.6B.100d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + priority: 1.3 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + use_logsoftmax: False + initial_state_trainable: True + hidden_size: 50 + #dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + ################# PIPE 2: image ################# + # Image encoder. + image_encoder: + priority: 3.1 + type: TorchVisionWrapper + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: image-question fusion ################# + # Element wise multiplication + FF. + question_image_fusion: + priority: 4.1 + type: ElementWiseMultiplication + dropout_rate: 0.5 + streams: + image_encodings: image_activations + question_encodings: question_activations + outputs: element_wise_activations + globals: + image_encoding_size: image_encoder_output_size + question_encoding_size: question_encoder_output_size + output_size: element_wise_activation_size + + question_image_ffn: + priority: 4.2 + type: FeedForwardNetwork + hidden_sizes: [100] + dropout_rate: 0.5 + streams: + inputs: element_wise_activations + predictions: question_image_activations + globals: + input_size: element_wise_activation_size + prediction_size: element_wise_activation_size + + ################# PIPE 4: image-question-image size fusion + classification ################# + # 2nd subpipeline: image size. + # Model - image size classifier. + image_size_encoder: + priority: 5.1 + type: FeedForwardNetwork + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + priority: 5.2 + type: Concatenation + input_streams: [question_image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10]] + output_dims: [-1,110] + streams: + outputs: concatenated_activations + globals: + output_size: concatentated_activations_size + + + classifier: + priority: 5.3 + type: FeedForwardNetwork + hidden_sizes: [500] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + globals: + input_size: concatentated_activations_size + prediction_size: vocabulary_size_c4 + + + #: pipeline diff --git a/configs/vqa_med_2019/c4_classification/default_c4_classification.yml b/configs/vqa_med_2019/c4_classification/default_c4_classification.yml new file mode 100644 index 0000000..e221187 --- /dev/null +++ b/configs/vqa_med_2019/c4_classification/default_c4_classification.yml @@ -0,0 +1,98 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + batch_size: 64 + categories: C4 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c4.weights.csv + dataloader: + num_workers: 4 + # Termination. + terminal_conditions: + loss_stop: 1.0e-2 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + problem: + batch_size: 64 + categories: C4 + dataloader: + num_workers: 4 + + +pipeline: + + # Answer encoding. + answer_indexer: + type: LabelIndexer + priority: 0.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c4.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c4 + word_mappings: word_mappings_c4 + + + # Predictions decoder. + prediction_decoder: + type: WordDecoder + priority: 10.1 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: predictions + outputs: predicted_answers + globals: + vocabulary_size: vocabulary_size_c4 + word_mappings: word_mappings_c4 + + # Loss + nllloss: + type: NLLLoss + priority: 10.2 + targets_dim: 1 + streams: + targets: answers_ids + loss: loss + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: answers_ids + + precision_recall: + type: PrecisionRecallStatistics + priority: 100.3 + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + streams: + targets: answers_ids + globals: + word_mappings: word_mappings_c4 + num_classes: vocabulary_size_c4 + + # Viewers. + viewer: + type: StreamViewer + priority: 100.4 + input_streams: questions,category_names,answers,predicted_answers + +#: pipeline From 5f57c7d22c7ecfb196526d4b6e7c9d5606496008 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 25 Apr 2019 21:00:14 -0700 Subject: [PATCH 10/15] Added preprocessing function to vqameed problem --- .../image_text_to_class/vqa_med_2019.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py index 9273fb5..ed9c433 100644 --- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py +++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py @@ -179,6 +179,45 @@ def filter_sources(self, source_files, source_categories): return source_files, source_categories + def preprocess_text(self, text, remove_stop_words = False): + """ + Function that preprocesses questions/answers as suggested by ImageCLEF VQA challenge organizers: + * lowercases all words + * removes punctuation + * removes stop words (optional) + + :param text: text to be processed. + :param remove_stop_words: removes stop words (DEFAULT: False) + + :return: Preprocessed and tokenized text (list of strings) + """ + # Lowercase. + text = text.lower() + + # Remove punctuation. + translator = str.maketrans('', '', string.punctuation) + text = text.translate(translator) + + # Remove '“' and '”' !!! + text = text.replace('“','').replace('”','') + + # Tokenize. + text_words = nltk.tokenize.word_tokenize(text) + + # If we do not want to remove stop words - return text. + if not remove_stop_words: + return text_words + + # Perform "cleansing". + stops = set(stopwords.words("english")) + cleansed_words = [word for word in text_words if word not in stops] + # Return the original text if there are no words left :] + if len(cleansed_words) == 0: + return text_words + + # Return cleaned text. + return cleansed_words + def load_dataset(self, source_files, source_categories): """ Loads the dataset from one or more files. From 2ca681b8f7267fa4ef6913b7ce5ad085e1ac4e67 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 25 Apr 2019 23:38:59 -0700 Subject: [PATCH 11/15] Added various preprocessing to vqa_med problem --- .../image_text_to_class/vqa_med_2019.yml | 24 +- .../image_text_to_class/vqa_med_2019.py | 255 +++++++++++++----- 2 files changed, 200 insertions(+), 79 deletions(-) diff --git a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml index 53c6407..bd724b4 100644 --- a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml +++ b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml @@ -15,10 +15,6 @@ split: training # Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4) categories: all -# Removes punctuation (LOADED) -# Options: none | questions | answers | all -remove_punctuation: questions - # Resize parameter (LOADED) # When present, resizes the images from original size to [height, width] # Depth remains set to 3. @@ -28,9 +24,23 @@ remove_punctuation: questions # Problem will use those values to rescale the image_sizes to range (0, 1). scale_image_size: [2414, 2323] -# Set augmentation parameter -# Use random affine transformations (rotate, scale and translate) -use_augmentation: False +# Select applied image preprocessing/augmentations (LOADED) +# Use one (or more) of the affine transformations: +# none | random_affine | random_horizontal_flip | normalize | all +# Accepted formats: a,b,c or [a,b,c] +image_preprocessing: normalize + +# Select applied question preprocessing/augmentations (LOADED) +# Use one (or more) of the transformations: +# none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all +# Accepted formats: a,b,c or [a,b,c] +question_preprocessing: lowercase, remove_punctuation + +# Select applied question preprocessing (LOADED) +# Options: none | lowercase | remove_punctuation | tokenize | all +# Accepted formats: a,b,c or [a,b,c] +answer_preprocessing: none + streams: #################################################################### diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py index ed9c433..4a9616f 100644 --- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py +++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py @@ -17,19 +17,51 @@ __author__ = "Chaitanya Shivade, Tomasz Kornuta" +import os import string import tqdm import pandas as pd from PIL import Image -from torchvision import transforms -import os +import nltk +from nltk.corpus import stopwords + import torch from torchvision import transforms from ptp.components.problems.problem import Problem from ptp.data_types.data_definition import DataDefinition +from ptp.configuration.configuration_error import ConfigurationError + +def get_value_list_from_dictionary(key, parameter_dict, accepted_values = []): + """ + Parses parameter values retrieved from a given parameter dictionary using key. + Optionally, checks is all values are accepted. + + :param key: Key of the parameter. + :param parameter_dict: Dictionary containing given key (e.g. config or globals) + :param accepted_values: List of accepted values (DEFAULT: []) + + :return: List of parsed values + """ + parameter = parameter_dict[key] + # Preprocess parameter value. + if (type(parameter) == str): + values = parameter.replace(" ","").split(",") + else: + values = parameter # list + assert type(values) == list, "Parameter value must be a list" + + # Test values one by one. + if len(accepted_values) > 0: + for value in values: + if value not in accepted_values: + raise ConfigurationError("One of the values in '{}' is invalid (current: '{}', accepted: {})".format(key, value, accepted_values)) + + # Return list. + return values + class VQAMED2019(Problem): """ @@ -67,6 +99,10 @@ def __init__(self, name, config): # Call constructors of parent classes. Problem.__init__(self, name, VQAMED2019, config) + # (Eventually) download required packages. + nltk.download('punkt') + nltk.download('stopwords') + # Get key mappings of all output streams. self.key_images = self.stream_keys["images"] self.key_image_ids = self.stream_keys["image_ids"] @@ -100,9 +136,6 @@ def __init__(self, name, config): self.globals["category_word_mappings"] = {'C1': 0, 'C2': 1, 'C3': 2, 'C4': 3, 'BINARY': 4, '': 5} self.category_idx_to_word = {0: 'C1', 1: 'C2', 2: 'C3', 3: 'C4', 4: 'BINARY', 5: ''} - # Check if we want to remove punctuation from questions/answer - self.remove_punctuation = self.config["remove_punctuation"] - # Get the absolute path. self.data_folder = os.path.expanduser(self.config['data_folder']) @@ -138,6 +171,42 @@ def __init__(self, name, config): # Filter lists taking into account configuration. source_files, source_categories = self.filter_sources(source_files, source_categories) + # else: # TODO + + # Get image augmentations. + self.image_preprocessing = get_value_list_from_dictionary( + "image_preprocessing", self.config, + 'none | random_affine | random_horizontal_flip | normalize | all'.split(" | ") + ) + if 'none' in self.image_preprocessing: + self.image_preprocessing = [] + if 'all' in self.image_preprocessing: + self.image_preprocessing = 'random_affine | random_horizontal_flip | normalize'.split(" | ") + self.logger.info("Applied image augmentations: {}".format(self.image_preprocessing)) + + + # Get question augmentations. + self.question_preprocessing = get_value_list_from_dictionary( + "question_preprocessing", self.config, + 'none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all'.split(" | ") + ) + if 'none' in self.question_preprocessing: + self.question_preprocessing = [] + if 'all' in self.question_preprocessing: + self.question_preprocessing = 'lowercase | remove_punctuation | tokenize | remove_stop_words | shuffle_words'.split(" | ") + self.logger.info("Applied question augmentations: {}".format(self.question_preprocessing)) + + # Get answer preprocessing. + self.answer_preprocessing = get_value_list_from_dictionary( + "answer_preprocessing", self.config, + 'none | lowercase | remove_punctuation | tokenize | all'.split(" | ") + ) + if 'none' in self.answer_preprocessing: + self.answer_preprocessing = [] + if 'all' in self.answer_preprocessing: + self.answer_preprocessing = 'lowercase | remove_punctuation | tokenize '.split(" | ") + self.logger.info("Applied answer preprocessing: {}".format(self.answer_preprocessing)) + # Load dataset. self.logger.info("Loading dataset from files:\n {}".format(source_files)) @@ -146,12 +215,13 @@ def __init__(self, name, config): # Display exemplary sample. self.logger.info("Exemplary sample:\n [ category: {}\t image_ids: {}\t question: {}\t answer: {} ]".format( + self.dataset[0][self.key_category_ids], self.dataset[0][self.key_image_ids], self.dataset[0][self.key_questions], - self.dataset[0][self.key_answers], - self.dataset[0][self.key_category_ids] + self.dataset[0][self.key_answers] )) + def filter_sources(self, source_files, source_categories): """ Loads the dataset from one or more files. @@ -165,42 +235,90 @@ def filter_sources(self, source_files, source_categories): # Check categories that user want to use. use_files = [False] * 4 categs = {'C1': 0, 'C2': 1, 'C3': 2, 'C4': 3} - for cat in self.config["categories"].replace(" ","").split(","): + # Parse categories from configuration list. + loaded_categs = get_value_list_from_dictionary("categories", self.config, ['C1', 'C2', 'C3', 'C4', 'all']) + for cat in loaded_categs: # "Special" case. if cat == "all": use_files = [True] * 4 # Make no sense to continue. break else: - if cat in categs.keys(): - use_files[categs[cat]] = True + use_files[categs[cat]] = True # Filter. _, source_files, source_categories = zip(*(filter(lambda x: x[0], zip(use_files, source_files,source_categories)))) return source_files, source_categories - def preprocess_text(self, text, remove_stop_words = False): + def __len__(self): + """ + Returns the "size" of the "problem" (total number of samples). + + :return: The size of the problem. + """ + return len(self.dataset) + + + def output_data_definitions(self): + """ + Function returns a dictionary with definitions of output data produced the component. + + :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + # Add all "standard" streams. + d = { + self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"), + self.key_images: DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]"), + self.key_image_ids: DataDefinition([-1, 1], [list, str], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]"), + self.key_image_sizes: DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]"), + self.key_category_ids: DataDefinition([-1], [torch.Tensor], "Batch of target category indices, each being a single index [BATCH_SIZE]"), + self.key_category_names: DataDefinition([-1, 1], [list, str], "Batch of category target names, each being a single word [BATCH_SIZE] x [STRING]"), + } + + # Add stream with questions. + if 'tokenize' in self.question_preprocessing: + d[self.key_questions] = DataDefinition([-1, -1, 1], [list, list, str], "Batch of questions, each being a list of words [BATCH_SIZE] x [SEQ_LEN] x [STRING]") + else: + d[self.key_questions] = DataDefinition([-1, 1], [list, str], "Batch of questions, each being a string consisting of many words [BATCH_SIZE] x [STRING]") + + # Add stream with answers. + if 'tokenize' in self.answer_preprocessing: + d[self.key_answers] = DataDefinition([-1, -1, 1], [list, list, str], "Batch of target answers, each being a list of words [BATCH_SIZE] x [SEQ_LEN] x [STRING]") + else: + d[self.key_answers]= DataDefinition([-1, 1], [list, str], "Batch of target answers, each being a string consisting of many words [BATCH_SIZE] x [STRING]") + return d + + + def preprocess_text(self, text, lowercase = False, remove_punctuation = False, tokenize = False, remove_stop_words = False): """ Function that preprocesses questions/answers as suggested by ImageCLEF VQA challenge organizers: - * lowercases all words - * removes punctuation + * lowercases all words (optional) + * removes punctuation (optional) * removes stop words (optional) :param text: text to be processed. + :param lowercase: lowercases text (DEFAULT: False) + :param remove_punctuation: removes punctuation (DEFAULT: False) + :param tokenize: tokenizes the text (DEFAULT: False) :param remove_stop_words: removes stop words (DEFAULT: False) :return: Preprocessed and tokenized text (list of strings) """ # Lowercase. - text = text.lower() + if lowercase: + text = text.lower() # Remove punctuation. - translator = str.maketrans('', '', string.punctuation) - text = text.translate(translator) - - # Remove '“' and '”' !!! - text = text.replace('“','').replace('”','') - + if remove_punctuation: + translator = str.maketrans('', '', string.punctuation) + text = text.translate(translator) + # Remove '“' and '”' !!! + text = text.replace('“','').replace('”','') + + # If not tokenize - return text. + if not tokenize: + return text + # Tokenize. text_words = nltk.tokenize.word_tokenize(text) @@ -229,9 +347,6 @@ def load_dataset(self, source_files, source_categories): # Set containing list of tuples. dataset = [] - # Create table used for removing punctuations. - table = str.maketrans({key: None for key in string.punctuation}) - # Process files with categories. for data_file, category in zip(source_files, source_categories): # Set absolute path to file. @@ -249,18 +364,28 @@ def load_dataset(self, source_files, source_categories): answer = row[self.key_answers] # Process question - if required. - if self.remove_punctuation in ["questions","all"]: - question = question.translate(table) + preprocessed_question = self.preprocess_text( + question, + 'lowercase' in self.question_preprocessing, + 'remove_punctuation' in self.question_preprocessing, + 'tokenize' in self.question_preprocessing, + 'remove_stop_words' in self.question_preprocessing + ) # Process answer - if required. - if self.remove_punctuation in ["answers","all"]: - answer = answer.translate(table) + preprocessed_answer = self.preprocess_text( + answer, + 'lowercase' in self.answer_preprocessing, + 'remove_punctuation' in self.answer_preprocessing, + 'tokenize' in self.answer_preprocessing, + False + ) # Add record to dataset. dataset.append({ self.key_image_ids: row[self.key_image_ids], - self.key_questions: question, - self.key_answers: answer, + self.key_questions: preprocessed_question, + self.key_answers: preprocessed_answer, # Add category. self.key_category_ids: category }) @@ -271,31 +396,6 @@ def load_dataset(self, source_files, source_categories): # Return the created list. return dataset - def __len__(self): - """ - Returns the "size" of the "problem" (total number of samples). - - :return: The size of the problem. - """ - return len(self.dataset) - - - def output_data_definitions(self): - """ - Function returns a dictionary with definitions of output data produced the component. - - :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). - """ - return { - self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"), - self.key_images: DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]"), - self.key_image_ids: DataDefinition([-1, 1], [list, str], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]"), - self.key_image_sizes: DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]"), - self.key_questions: DataDefinition([-1, 1], [list, str], "Batch of questions, each being a string consisting of many words [BATCH_SIZE] x [STRING]"), - self.key_answers: DataDefinition([-1, 1], [list, str], "Batch of target answers, each being a string consisting of many words [BATCH_SIZE] x [STRING]"), - self.key_category_ids: DataDefinition([-1], [torch.Tensor], "Batch of target category indices, each being a single index [BATCH_SIZE]"), - self.key_category_names: DataDefinition([-1, 1], [list, str], "Batch of category target names, each being a single word [BATCH_SIZE] x [STRING]"), - } def __getitem__(self, index): @@ -318,23 +418,29 @@ def __getitem__(self, index): # Get its width and height. width, height = img.size - if(self.config['use_augmentation'] == 'True'): + image_transformations_list = [] + # Optional. + if 'random_affine' in self.image_preprocessing: rotate = (-45, 135) translate = (0.05, 0.25) scale = (0.5, 2) - transforms_list = [transforms.RandomAffine(rotate, translate, scale), transforms.RandomHorizontalFlip()] - else: - transforms_list = [] - # Resize the image and transform to Torch Tensor. - transforms_com = transforms.Compose(transforms_list + [ - transforms.Resize([self.height,self.width]), - transforms.ToTensor(), - # Use normalization that the pretrained models from TorchVision require. - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) - img = transforms_com(img) #.type(torch.FloatTensor).squeeze() + image_transformations_list.append(transforms.RandomAffine(rotate, translate, scale)) + if 'random_horizontal_flip' in self.image_preprocessing: + image_transformations_list.append(transforms.RandomHorizontalFlip()) + + # Add two obligatory transformations. + image_transformations_list.append(transforms.Resize([self.height,self.width])) + image_transformations_list.append(transforms.ToTensor()) + + # Optional normalizastion. + if 'normalize' in self.image_preprocessing: + # Use normalization that the pretrained models from TorchVision require. + image_transformations_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) - #print("img: min_val = {} max_val = {}".format(torch.min(img),torch.max(img)) ) + # Resize the image and transform to Torch Tensor. + transforms_com = transforms.Compose(image_transformations_list) + # Apply transformations. + img = transforms_com(img) # Create the resulting sample (data dict). data_dict = self.create_data_dict(index) @@ -345,13 +451,18 @@ def __getitem__(self, index): # Scale width and height to range (0,1). data_dict[self.key_image_sizes] = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)]) - # Question. - data_dict[self.key_questions] = item[self.key_questions] - data_dict[self.key_answers] = item[self.key_answers] + # Apply question transformations. + preprocessed_question = item[self.key_questions] + # TODO: apply additional random transformations e.g. "shuffle_words" + data_dict[self.key_questions] = preprocessed_question + + # Return answer. + preprocessed_answer = item[self.key_answers] + data_dict[self.key_answers] = preprocessed_answer # Question category related variables. # Check if this is binary question. - if self.predict_yes_no(item[self.key_questions]): + if self.predict_yes_no(item[self.key_answers]): data_dict[self.key_category_ids] = 4 # Binary. data_dict[self.key_category_names] = self.category_idx_to_word[4] else: @@ -390,7 +501,7 @@ def collate_fn(self, batch): data_dict[self.key_image_ids] = [item[self.key_image_ids] for item in batch] data_dict[self.key_image_sizes] = torch.stack([item[self.key_image_sizes] for item in batch]).type(torch.FloatTensor) - # Collate lists. + # Collate lists/lists of lists. data_dict[self.key_questions] = [item[self.key_questions] for item in batch] data_dict[self.key_answers] = [item[self.key_answers] for item in batch] From 409f993c615f3c831e3fbb1185f88aae8a24692f Mon Sep 17 00:00:00 2001 From: tkornut Date: Fri, 26 Apr 2019 00:26:49 -0700 Subject: [PATCH 12/15] Added preprocessing to sentence tokenizer, cleanups --- .../components/text/sentence_tokenizer.yml | 9 ++++ .../image_text_to_class/vqa_med_2019.py | 47 ++++-------------- ptp/components/text/sentence_tokenizer.py | 49 ++++++++++++++++--- ptp/configuration/config_parsing.py | 29 +++++++++++ 4 files changed, 90 insertions(+), 44 deletions(-) diff --git a/configs/default/components/text/sentence_tokenizer.yml b/configs/default/components/text/sentence_tokenizer.yml index 40714c7..233c221 100644 --- a/configs/default/components/text/sentence_tokenizer.yml +++ b/configs/default/components/text/sentence_tokenizer.yml @@ -8,6 +8,15 @@ # False: sentence -> list of strings, True: list of strings -> sentence. detokenize: False +# Select applied preprocessing/augmentations (LOADED) +# Use one (or more) of the transformations: +# none | lowercase | remove_punctuation | all +# Accepted formats: a,b,c or [a,b,c] +preprocessing: none + +# List of characters to be removed +remove_characters: '' + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py index 4a9616f..a06624c 100644 --- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py +++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py @@ -24,7 +24,6 @@ from PIL import Image import nltk -from nltk.corpus import stopwords import torch from torchvision import transforms @@ -32,36 +31,7 @@ from ptp.components.problems.problem import Problem from ptp.data_types.data_definition import DataDefinition -from ptp.configuration.configuration_error import ConfigurationError - -def get_value_list_from_dictionary(key, parameter_dict, accepted_values = []): - """ - Parses parameter values retrieved from a given parameter dictionary using key. - Optionally, checks is all values are accepted. - - :param key: Key of the parameter. - :param parameter_dict: Dictionary containing given key (e.g. config or globals) - :param accepted_values: List of accepted values (DEFAULT: []) - - :return: List of parsed values - """ - parameter = parameter_dict[key] - # Preprocess parameter value. - if (type(parameter) == str): - values = parameter.replace(" ","").split(",") - else: - values = parameter # list - assert type(values) == list, "Parameter value must be a list" - - # Test values one by one. - if len(accepted_values) > 0: - for value in values: - if value not in accepted_values: - raise ConfigurationError("One of the values in '{}' is invalid (current: '{}', accepted: {})".format(key, value, accepted_values)) - - # Return list. - return values - +from ptp.configuration.config_parsing import get_value_list_from_dictionary class VQAMED2019(Problem): """ @@ -173,7 +143,7 @@ def __init__(self, name, config): source_files, source_categories = self.filter_sources(source_files, source_categories) # else: # TODO - # Get image augmentations. + # Get image preprocessing. self.image_preprocessing = get_value_list_from_dictionary( "image_preprocessing", self.config, 'none | random_affine | random_horizontal_flip | normalize | all'.split(" | ") @@ -182,10 +152,10 @@ def __init__(self, name, config): self.image_preprocessing = [] if 'all' in self.image_preprocessing: self.image_preprocessing = 'random_affine | random_horizontal_flip | normalize'.split(" | ") - self.logger.info("Applied image augmentations: {}".format(self.image_preprocessing)) + self.logger.info("Applied image preprocessing: {}".format(self.image_preprocessing)) - # Get question augmentations. + # Get question preprocessing. self.question_preprocessing = get_value_list_from_dictionary( "question_preprocessing", self.config, 'none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all'.split(" | ") @@ -194,7 +164,7 @@ def __init__(self, name, config): self.question_preprocessing = [] if 'all' in self.question_preprocessing: self.question_preprocessing = 'lowercase | remove_punctuation | tokenize | remove_stop_words | shuffle_words'.split(" | ") - self.logger.info("Applied question augmentations: {}".format(self.question_preprocessing)) + self.logger.info("Applied question preprocessing: {}".format(self.question_preprocessing)) # Get answer preprocessing. self.answer_preprocessing = get_value_list_from_dictionary( @@ -310,10 +280,11 @@ def preprocess_text(self, text, lowercase = False, remove_punctuation = False, t # Remove punctuation. if remove_punctuation: + # Remove '“' and '”' and '’'!!! + for char in ['“', '”', '’']: + text = text.replace(char,' ') translator = str.maketrans('', '', string.punctuation) text = text.translate(translator) - # Remove '“' and '”' !!! - text = text.replace('“','').replace('”','') # If not tokenize - return text. if not tokenize: @@ -327,7 +298,7 @@ def preprocess_text(self, text, lowercase = False, remove_punctuation = False, t return text_words # Perform "cleansing". - stops = set(stopwords.words("english")) + stops = set(nltk.corpus.stopwords.words("english")) cleansed_words = [word for word in text_words if word not in stops] # Return the original text if there are no words left :] if len(cleansed_words) == 0: diff --git a/ptp/components/text/sentence_tokenizer.py b/ptp/components/text/sentence_tokenizer.py index bd62526..1c94e95 100644 --- a/ptp/components/text/sentence_tokenizer.py +++ b/ptp/components/text/sentence_tokenizer.py @@ -14,11 +14,15 @@ __author__ = "Tomasz Kornuta" -from nltk.tokenize import WhitespaceTokenizer +import nltk +#from nltk.tokenize import WhitespaceTokenizer +import string from ptp.components.component import Component from ptp.data_types.data_definition import DataDefinition +from ptp.configuration.config_parsing import get_value_list_from_dictionary + class SentenceTokenizer(Component): """ @@ -41,8 +45,26 @@ def __init__(self, name, config): # Read the actual configuration. self.mode_detokenize = config['detokenize'] + # Get preprocessing. + self.preprocessing = get_value_list_from_dictionary( + "preprocessing", self.config, + 'none | lowercase | remove_punctuation | all'.split(" | ") + ) + if 'none' in self.preprocessing: + self.preprocessing = [] + if 'all' in self.preprocessing: + self.preprocessing = 'lowercase | remove_punctuation'.split(" | ") + self.logger.info("Applied preprocessing: {}".format(self.preprocessing)) + + self.remove_characters = get_value_list_from_dictionary("remove_characters", self.config) + self.logger.info("Additional characters that will be removed during preprocessing: {}".format(self.remove_characters)) + + + if 'remove_punctuation' in self.preprocessing: + self.translator = str.maketrans('', '', string.punctuation) + # Tokenizer. - self.tokenizer = WhitespaceTokenizer() + self.tokenizer = nltk.tokenize.WhitespaceTokenizer() # Set key mappings. self.key_inputs = self.stream_keys["inputs"] @@ -81,15 +103,30 @@ def output_data_definitions(self): return { self.key_outputs: DataDefinition([-1, 1], [list, str], "Batch of sentences, each represented as a single string [BATCH_SIZE] x [string]") } - def tokenize_sample(self, sample): + def tokenize_sample(self, text): """ - Changes sample (sentence) into list of tokens (words). + Changes text (sentence) into list of tokens (words). - :param sample: sentence (string). + :param text: sentence (string). :return: list of words (strings). """ - return self.tokenizer.tokenize(sample) # sample.split() + # Lowercase. + if 'lowercase' in self.preprocessing: + text = text.lower() + + # Remove characters. + for char in self.remove_characters: + text = text.replace(char, ' ') + + # Remove punctuation. + if 'remove_punctuation' in self.preprocessing: + text = text.translate(self.translator) + + # Tokenize. + text_words = self.tokenizer.tokenize(text) + + return text_words def detokenize_sample(self, sample): """ diff --git a/ptp/configuration/config_parsing.py b/ptp/configuration/config_parsing.py index 7c2aeb3..8a557df 100644 --- a/ptp/configuration/config_parsing.py +++ b/ptp/configuration/config_parsing.py @@ -210,3 +210,32 @@ def reverse_order_config_load(config_interface_obj, configs_to_load, abs_config_ # Load config from YAML file. config_interface_obj.add_config_params_from_yaml(abs_config_path + config) print('Info: Loaded configuration from file {}'.format(abs_config_path + config)) + + +def get_value_list_from_dictionary(key, parameter_dict, accepted_values = []): + """ + Parses parameter values retrieved from a given parameter dictionary using key. + Optionally, checks is all values are accepted. + + :param key: Key of the parameter. + :param parameter_dict: Dictionary containing given key (e.g. config or globals) + :param accepted_values: List of accepted values (DEFAULT: []) + + :return: List of parsed values + """ + parameter = parameter_dict[key] + # Preprocess parameter value. + if (type(parameter) == str): + values = parameter.replace(" ","").split(",") + else: + values = parameter # list + assert type(values) == list, "Parameter value must be a list" + + # Test values one by one. + if len(accepted_values) > 0: + for value in values: + if value not in accepted_values: + raise ConfigurationError("One of the values in '{}' is invalid (current: '{}', accepted: {})".format(key, value, accepted_values)) + + # Return list. + return values From 477eed4aaa75a9a9237fc60bdb5432d635a27660 Mon Sep 17 00:00:00 2001 From: tkornut Date: Fri, 26 Apr 2019 00:27:26 -0700 Subject: [PATCH 13/15] C4: config for classification of answer depending on answer words --- .../c4_word_answer_onehot_bow.yml | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml new file mode 100644 index 0000000..f41c722 --- /dev/null +++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml @@ -0,0 +1,62 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml + +# Training parameters: +training: + problem: + batch_size: 128 + remove_punctuation: all + +# Validation parameters: +validation: + problem: + batch_size: 128 + remove_punctuation: all + +pipeline: + name: c4_word_answer_onehot_bow + + # Answer encoding. + answer_tokenizer: + type: SentenceTokenizer + priority: 1.1 + preprocessing: lowercase,remove_punctuation + remove_characters: [“,”,’] + streams: + inputs: answers + outputs: tokenized_answer_words + + answer_onehot_encoder: + type: SentenceOneHotEncoder + priority: 1.2 + data_folder: ~/data/vqa-med + word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv + export_word_mappings_to_globals: True + streams: + inputs: tokenized_answer_words + outputs: encoded_answer_words + globals: + vocabulary_size: answer_words_vocabulary_size + word_mappings: answer_words_word_mappings + + answer_bow_encoder: + type: BOWEncoder + priority: 1.3 + streams: + inputs: encoded_answer_words + outputs: bow_answer_words + globals: + bow_size: answer_words_vocabulary_size + + # Model. + classifier: + type: FeedForwardNetwork + hidden_sizes: [100, 100] + priority: 3 + streams: + inputs: bow_answer_words + globals: + input_size: answer_words_vocabulary_size + prediction_size: vocabulary_size_c4 + +#: pipeline From ee8cad569ded8479f82dd1ce41c5a4ac67e50921 Mon Sep 17 00:00:00 2001 From: tkornut Date: Fri, 26 Apr 2019 00:36:37 -0700 Subject: [PATCH 14/15] C4: config for classification of answer depending on answer words, dropout: 0.5 one hidden layer (500) --- .../c4_classification/c4_word_answer_onehot_bow.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml index f41c722..842a987 100644 --- a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml +++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml @@ -51,7 +51,8 @@ pipeline: # Model. classifier: type: FeedForwardNetwork - hidden_sizes: [100, 100] + hidden_sizes: [500] + dropout_rate: 0.5 priority: 3 streams: inputs: bow_answer_words From 07270dc9f4d0b605ab4177daf4aa3e9392acf768 Mon Sep 17 00:00:00 2001 From: tkornut Date: Fri, 26 Apr 2019 01:08:16 -0700 Subject: [PATCH 15/15] comment --- .../components/problems/image_text_to_class/vqa_med_2019.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml index bd724b4..da0de78 100644 --- a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml +++ b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml @@ -37,7 +37,8 @@ image_preprocessing: normalize question_preprocessing: lowercase, remove_punctuation # Select applied question preprocessing (LOADED) -# Options: none | lowercase | remove_punctuation | tokenize | all +# Use one (or more) of the transformations: +# none | lowercase | remove_punctuation | tokenize | all # Accepted formats: a,b,c or [a,b,c] answer_preprocessing: none