From 56cabb913722653c8a14d8d475f950a31eeefa3b Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 12:45:35 -0700 Subject: [PATCH 01/14] extend for C4 --- configs/vqa_med_2019/extend_answers.yml | 1 + configs/vqa_med_2019/extend_answers_c4.yml | 74 ++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 configs/vqa_med_2019/extend_answers_c4.yml diff --git a/configs/vqa_med_2019/extend_answers.yml b/configs/vqa_med_2019/extend_answers.yml index 9e2f9a4..304a7c1 100644 --- a/configs/vqa_med_2019/extend_answers.yml +++ b/configs/vqa_med_2019/extend_answers.yml @@ -24,6 +24,7 @@ validation_answers: type: *p_type data_folder: *data_folder split: validation + categories: all resize_image: *resize_image batch_size: 64 # Appy all preprocessing/data augmentations. diff --git a/configs/vqa_med_2019/extend_answers_c4.yml b/configs/vqa_med_2019/extend_answers_c4.yml new file mode 100644 index 0000000..40a1381 --- /dev/null +++ b/configs/vqa_med_2019/extend_answers_c4.yml @@ -0,0 +1,74 @@ +# This config is not a standalone config! +# It adds new sections (sets) without samplers and components for saving answers that we can use for getting final answers. + +training_answers: + problem: + type: &p_type VQAMED2019 + data_folder: &data_folder ~/data/vqa-med + split: training + categories: C4 + resize_image: &resize_image [224, 224] + batch_size: 64 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + dataloader: + # No sampler, process samples in the same order. + shuffle: false + # Use 1 worker, so batches will follow the samples order. + num_workers: 1 + +validation_answers: + problem: + type: *p_type + data_folder: *data_folder + split: validation + categories: C4 + resize_image: *resize_image + batch_size: 64 + # Appy all preprocessing/data augmentations. + question_preprocessing: lowercase,remove_punctuation,tokenize + streams: + questions: tokenized_questions + dataloader: + # No sampler, process samples in the same order. + shuffle: false + # Use 1 worker, so batches will follow the samples order. + num_workers: 1 + + +# Add component for exporting answers to files. +pipeline: + disable: viewer,question_tokenizer + # Viewers. + viewer_extended: + priority: 100.4 + type: StreamViewer + sample_number: 0 + input_streams: + indices,image_ids,tokenized_questions, + category_names,predicted_categories, + answers,tokenized_answers,predicted_answers + + answer_exporter: + priority: 100.5 + type: StreamFileExporter + separator: '|' + filename: 'answers.csv' + export_separator_line_to_csv: True + input_streams: + indices,image_ids,tokenized_questions, + category_names,predicted_categories, + answers,tokenized_answers,predicted_answers + + submission_exporter: + priority: 100.6 + type: StreamFileExporter + separator: '|' + filename: 'submission.txt' + input_streams: + image_ids, + predicted_answers + +#: pipeline From 71425d2e6641a661496847ebf9d80368566ac15e Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 13:59:08 -0700 Subject: [PATCH 02/14] Component: ReduceTensor + config, config c4_word_answer_glove_sum --- .../components/transforms/reduce_tensor.yml | 47 ++++++ .../c4_word_answer_glove_sum.yml | 91 ++++++++++++ .../default_c4_classification.yml | 4 +- ptp/components/transforms/__init__.py | 2 + ptp/components/transforms/list_to_tensor.py | 2 +- ptp/components/transforms/reduce_tensor.py | 134 ++++++++++++++++++ 6 files changed, 277 insertions(+), 3 deletions(-) create mode 100644 configs/default/components/transforms/reduce_tensor.yml create mode 100644 configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml create mode 100644 ptp/components/transforms/reduce_tensor.py diff --git a/configs/default/components/transforms/reduce_tensor.yml b/configs/default/components/transforms/reduce_tensor.yml new file mode 100644 index 0000000..57f267b --- /dev/null +++ b/configs/default/components/transforms/reduce_tensor.yml @@ -0,0 +1,47 @@ +# This file defines the default values for the ReduceTensor transformation. + +#################################################################### +# 1. CONFIGURATION PARAMETERS that will be LOADED by the component. +#################################################################### + +# Number of input dimensions, including batch (LOADED) +num_inputs_dims: 2 + +# Dimension along with the reduction will be applied (LOADED) +reduction_dim: 1 + +# Reduction type (LOADED) +# Options: sum | mean | min | max | argmin | argmax +reduction_type: sum + +# If True, the output tensor is of the same size as input, except dim where it is of size 1 (LOADED) +keepdim: False + +streams: + #################################################################### + # 2. Keymappings associated with INPUT and OUTPUT streams. + #################################################################### + + # Stream containing input tensor (INPUT) + inputs: inputs + + # Stream containing output tensor (OUTPUT) + outputs: outputs + +globals: + #################################################################### + # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. + #################################################################### + + # Size of the intput_item (GET) + # (last dimenstion) + input_size: input_size + + #################################################################### + # 4. Keymappings associated with GLOBAL variables that will be SET. + #################################################################### + + #################################################################### + # 5. Keymappings associated with statistics that will be ADDED. + #################################################################### + diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml new file mode 100644 index 0000000..2c12145 --- /dev/null +++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml @@ -0,0 +1,91 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml + +# Training parameters: +training: + problem: + categories: C4 + batch_size: 512 + # In here we won't use images at all. + stream_images: False + dataloader: + num_workers: 0 + +# Validation parameters: +validation: + problem: + categories: C4 + batch_size: 512 + # In here we won't use images at all. + stream_images: False + dataloader: + num_workers: 0 + + +pipeline: + + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [answer_word_embeddings_size] + values: [100] + + # Answer encoding. + answer_tokenizer: + type: SentenceTokenizer + priority: 1.1 + preprocessing: lowercase,remove_punctuation + remove_characters: [“,”,’] + streams: + inputs: answers + outputs: tokenized_answer_words + + # Model 1: Embeddings + answer_embeddings: + priority: 1.2 + type: SentenceEmbeddings + embeddings_size: 100 + pretrained_embeddings_file: glove.6B.100d.txt + data_folder: ~/data/vqa-med + word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv + export_word_mappings_to_globals: True + streams: + inputs: tokenized_answer_words + outputs: encoded_answer_words + globals: + vocabulary_size: answer_words_vocabulary_size + word_mappings: answer_words_word_mappings + + answer_reduction: + type: ReduceTensor + priority: 1.3 + num_inputs_dims: 3 + reduction_dim: 1 + reduction_type: sum + keepdim: False + streams: + inputs: encoded_answer_words + outputs: reduced_answers + globals: + input_size: answer_word_embeddings_size + + # Model. + classifier: + type: FeedForwardNetwork + hidden_sizes: [500, 500] + dropout_rate: 0.5 + priority: 3 + streams: + inputs: reduced_answers + globals: + input_size: answer_word_embeddings_size + prediction_size: vocabulary_size_c4 + + # Viewers. + viewer: + type: StreamViewer + priority: 100.4 + input_streams: answers, tokenized_answer_words, predicted_answers + +#: pipeline diff --git a/configs/vqa_med_2019/c4_classification/default_c4_classification.yml b/configs/vqa_med_2019/c4_classification/default_c4_classification.yml index 1e33502..dcfcd52 100644 --- a/configs/vqa_med_2019/c4_classification/default_c4_classification.yml +++ b/configs/vqa_med_2019/c4_classification/default_c4_classification.yml @@ -70,8 +70,8 @@ pipeline: type: PrecisionRecallStatistics priority: 100.3 use_word_mappings: True - show_class_scores: True - show_confusion_matrix: True + #show_class_scores: True + #show_confusion_matrix: True streams: targets: answers_ids globals: diff --git a/ptp/components/transforms/__init__.py b/ptp/components/transforms/__init__.py index 36d3845..6200892 100644 --- a/ptp/components/transforms/__init__.py +++ b/ptp/components/transforms/__init__.py @@ -1,10 +1,12 @@ from .concatenation import Concatenation from .list_to_tensor import ListToTensor +from .reduce_tensor import ReduceTensor from .reshape_tensor import ReshapeTensor __all__ = [ 'Concatenation', 'ListToTensor', + 'ReduceTensor', 'ReshapeTensor', ] diff --git a/ptp/components/transforms/list_to_tensor.py b/ptp/components/transforms/list_to_tensor.py index fbf3f21..0f12fb5 100644 --- a/ptp/components/transforms/list_to_tensor.py +++ b/ptp/components/transforms/list_to_tensor.py @@ -69,7 +69,7 @@ def input_data_definitions(self): def output_data_definitions(self): """ - Function returns a empty dictionary with definitions of output data produced the component. + Function returns a dictionary with definitions of output data produced the component. :return: Empty dictionary. """ diff --git a/ptp/components/transforms/reduce_tensor.py b/ptp/components/transforms/reduce_tensor.py new file mode 100644 index 0000000..d36ee25 --- /dev/null +++ b/ptp/components/transforms/reduce_tensor.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) tkornuta, IBM Corporation 2019 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = "Tomasz Kornuta" + +import torch + +from ptp.components.component import Component +from ptp.data_types.data_definition import DataDefinition +from ptp.configuration.config_parsing import get_value_from_dictionary + + +class ReduceTensor(Component): + """ + Class responsible for reducing tensor using indicated reduction method along a given dimension. + + """ + + def __init__(self, name, config): + """ + Initializes object. + + :param name: Name of the component loaded from the configuration file. + :type name: str + + :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). + :type config: :py:class:`ptp.configuration.ConfigInterface` + + """ + # Call constructors of parent classes. + Component.__init__(self, name, ReduceTensor, config) + + # Set key mappings. + self.key_inputs = self.stream_keys["inputs"] + self.key_outputs = self.stream_keys["outputs"] + + # Get number of input dimensions from configuration. + self.num_inputs_dims = self.config["num_inputs_dims"] + # Get size of a single input item (last dimension) from globals. + self.input_size = self.globals["input_size"] + + # Get reduction tparamsype from configuration. + self.dim = self.config["reduction_dim"] + self.keepdim = self.config["keepdim"] + + # Set reduction type. + rt = get_value_from_dictionary( + "reduction_type", self.config, + 'sum | mean | min | max | argmin | argmax'.split(" | ") + ) + reduction_types = {} + reduction_types["sum"] = torch.sum + reduction_types["mean"] = torch.mean + reduction_types["min"] = torch.min + reduction_types["max"] = torch.max + reduction_types["argmin"] = torch.argmin + reduction_types["argmax"] = torch.argmax + + self.reduction = reduction_types[rt] + + + def input_data_definitions(self): + """ + Function returns a dictionary with definitions of input data that are required by the component. + + :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + # Generate the description of input stream. + dims_desc = ["DIM {}".format(i) for i in range(self.num_inputs_dims-1)] + desc = "Batch of outputs [" + " x ".join(dims_desc) + "]" + return { + self.key_inputs: DataDefinition( + [-1]*(self.num_inputs_dims-1) + [self.input_size], + [torch.Tensor], + desc) + } + + def output_data_definitions(self): + """ + Function returns a dictionary with definitions of output data produced the component. + + :return: Empty dictionary. + """ + # Generate the dimensions and description of output stream. + if self.keepdim: + dims = [-1]*(self.num_inputs_dims-1) + [self.input_size] + dims[self.dim] = 1 + dims_desc = ["DIM {}".format(i) for i in range(self.num_inputs_dims)] + dims_desc[self.dim] = "1" + desc = "Batch of outputs [" + " x ".join(dims_desc) + "]" + else: + dims = [-1]*(self.num_inputs_dims-2) + [self.input_size] + dims_desc = ["DIM {}".format(i) for i in range(self.num_inputs_dims-1)] + desc = "Batch of outputs [" + " x ".join(dims_desc) + "]" + return { + self.key_outputs: DataDefinition( + dims, + [torch.Tensor], + desc) + } + + + def __call__(self, data_dict): + """ + Encodes "inputs" in the format of a single tensor. + Stores reshaped tensor in "outputs" field of in data_dict. + + :param data_dict: :py:class:`ptp.utils.DataDict` object containing (among others): + + - "inputs": expected input field containing tensor [BATCH_SIZE x ...] + + - "outputs": added output field containing tensor [BATCH_SIZE x ...] + """ + # Get inputs to be encoded. + inputs = data_dict[self.key_inputs] + + outputs = self.reduction(inputs, self.dim, self.keepdim) + + # Create the returned dict. + data_dict.extend({self.key_outputs: outputs}) + From 711a1d63c518446c609843d1e9846ca387141cc0 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 14:51:24 -0700 Subject: [PATCH 03/14] C4 wor_answer classifiers --- .../components/models/sentence_embeddings.yml | 4 +- .../c4_word_answer_glove_sum.yml | 6 +- .../c4_word_answer_mimic_sum.yml | 91 +++++++++++++++++++ .../c4_word_answer_onehot_sum.yml | 91 +++++++++++++++++++ 4 files changed, 188 insertions(+), 4 deletions(-) create mode 100644 configs/vqa_med_2019/c4_classification/c4_word_answer_mimic_sum.yml create mode 100644 configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_sum.yml diff --git a/configs/default/components/models/sentence_embeddings.yml b/configs/default/components/models/sentence_embeddings.yml index 0c9432f..dceb725 100644 --- a/configs/default/components/models/sentence_embeddings.yml +++ b/configs/default/components/models/sentence_embeddings.yml @@ -40,7 +40,9 @@ fixed_padding: -1 # File containing pretrained embeddings (LOADED) # Empty means that no embeddings will be loaded. -# Options: '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt | glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled +# Options: +# '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt | +# glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled pretrained_embeddings_file: '' streams: diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml index 2c12145..5c1b1f4 100644 --- a/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml +++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml @@ -29,7 +29,7 @@ pipeline: type: GlobalVariablePublisher # Add input_size to globals. keys: [answer_word_embeddings_size] - values: [100] + values: [300] # Answer encoding. answer_tokenizer: @@ -45,8 +45,8 @@ pipeline: answer_embeddings: priority: 1.2 type: SentenceEmbeddings - embeddings_size: 100 - pretrained_embeddings_file: glove.6B.100d.txt + embeddings_size: 300 + pretrained_embeddings_file: glove.840B.300d.txt data_folder: ~/data/vqa-med word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv export_word_mappings_to_globals: True diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_mimic_sum.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_mimic_sum.yml new file mode 100644 index 0000000..db28cc4 --- /dev/null +++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_mimic_sum.yml @@ -0,0 +1,91 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml + +# Training parameters: +training: + problem: + categories: C4 + batch_size: 512 + # In here we won't use images at all. + stream_images: False + dataloader: + num_workers: 0 + +# Validation parameters: +validation: + problem: + categories: C4 + batch_size: 512 + # In here we won't use images at all. + stream_images: False + dataloader: + num_workers: 0 + + +pipeline: + + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [answer_word_embeddings_size] + values: [300] + + # Answer encoding. + answer_tokenizer: + type: SentenceTokenizer + priority: 1.1 + preprocessing: lowercase,remove_punctuation + remove_characters: [“,”,’] + streams: + inputs: answers + outputs: tokenized_answer_words + + # Model 1: Embeddings + answer_embeddings: + priority: 1.2 + type: SentenceEmbeddings + embeddings_size: 300 + pretrained_embeddings_file: mimic.fastText.no_clean.300d.pickled + data_folder: ~/data/vqa-med + word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv + export_word_mappings_to_globals: True + streams: + inputs: tokenized_answer_words + outputs: encoded_answer_words + globals: + vocabulary_size: answer_words_vocabulary_size + word_mappings: answer_words_word_mappings + + answer_reduction: + type: ReduceTensor + priority: 1.3 + num_inputs_dims: 3 + reduction_dim: 1 + reduction_type: sum + keepdim: False + streams: + inputs: encoded_answer_words + outputs: reduced_answers + globals: + input_size: answer_word_embeddings_size + + # Model. + classifier: + type: FeedForwardNetwork + hidden_sizes: [500, 500] + dropout_rate: 0.5 + priority: 3 + streams: + inputs: reduced_answers + globals: + input_size: answer_word_embeddings_size + prediction_size: vocabulary_size_c4 + + # Viewers. + viewer: + type: StreamViewer + priority: 100.4 + input_streams: answers, tokenized_answer_words, predicted_answers + +#: pipeline diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_sum.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_sum.yml new file mode 100644 index 0000000..640f202 --- /dev/null +++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_sum.yml @@ -0,0 +1,91 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml + +# Training parameters: +training: + problem: + categories: C4 + batch_size: 512 + # In here we won't use images at all. + stream_images: False + dataloader: + num_workers: 0 + +# Validation parameters: +validation: + problem: + categories: C4 + batch_size: 512 + # In here we won't use images at all. + stream_images: False + dataloader: + num_workers: 0 + + +pipeline: + # Answer encoding. + answer_tokenizer: + type: SentenceTokenizer + priority: 1.1 + preprocessing: lowercase,remove_punctuation + remove_characters: [“,”,’] + streams: + inputs: answers + outputs: tokenized_answer_words + + answer_onehot_encoder: + type: SentenceOneHotEncoder + priority: 1.2 + data_folder: ~/data/vqa-med + word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv + export_word_mappings_to_globals: True + streams: + inputs: tokenized_answer_words + outputs: encoded_answer_words + globals: + vocabulary_size: answer_words_vocabulary_size + word_mappings: answer_words_word_mappings + + answer_to_tensor: + type: ListToTensor + priority: 1.3 + num_inputs_dims: 3 + streams: + inputs: encoded_answer_words + outputs: tensor_answer_words + globals: + input_size: answer_words_vocabulary_size + + + answer_reduction: + type: ReduceTensor + priority: 1.4 + num_inputs_dims: 3 + reduction_dim: 1 + reduction_type: sum + keepdim: False + streams: + inputs: tensor_answer_words + outputs: reduced_answer_words + globals: + input_size: answer_words_vocabulary_size + + # Model. + classifier: + type: FeedForwardNetwork + hidden_sizes: [500, 500] + dropout_rate: 0.5 + priority: 3 + streams: + inputs: reduced_answer_words + globals: + input_size: answer_words_vocabulary_size + prediction_size: vocabulary_size_c4 + + # Viewers. + viewer: + type: StreamViewer + priority: 100.4 + input_streams: answers, tokenized_answer_words, predicted_answers + +#: pipeline From 5ad971720e14e23f9c180d414e968eddc83085b1 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 15:18:55 -0700 Subject: [PATCH 04/14] fix in embeddings - some GloVe labels appeared to have many words --- ptp/components/utils/embeddings.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ptp/components/utils/embeddings.py b/ptp/components/utils/embeddings.py index 6d22d71..ca1e0fb 100644 --- a/ptp/components/utils/embeddings.py +++ b/ptp/components/utils/embeddings.py @@ -116,12 +116,24 @@ def load_pretrained_embeddings(logger, folder, embeddings_name, word_to_ix, embe # Parse file and cherry pick the vectors that fit our vocabulary. for line in f.readlines(): values = line.split() - # Get word. - word = values[0] + if len(values) > embeddings_size+1: + #print(len(values)) + # Case: two (or more) words! + num_words = len(values) - embeddings_size + words = values[0:num_words] + word = ' '.join(words) + #print(word) + # Get remaining vector. + vector = np.array(values[num_words:], dtype='float32') + else: + # Get word. + word = values[0] + # Get remaining vector. + vector = np.array(values[1:], dtype='float32') + # Get index. index = word_to_ix.get(word) if index: - vector = np.array(values[1:], dtype='float32') assert (len(vector) == embeddings_size), "Embeddings size must be equal to the size of pretrained embeddings!" # Ok, set vector. embeddings[index] = vector From 5aae937fa0128dd2cd34d1a0b7c53b674d0a4fa1 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 15:39:04 -0700 Subject: [PATCH 05/14] Added bar to loading of word embeddins (as this might take a while for bigger embeddings) --- .../c4_classification/c4_word_answer_glove_sum.yml | 6 +++--- ptp/components/utils/embeddings.py | 11 +++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml index 5c1b1f4..2c12145 100644 --- a/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml +++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml @@ -29,7 +29,7 @@ pipeline: type: GlobalVariablePublisher # Add input_size to globals. keys: [answer_word_embeddings_size] - values: [300] + values: [100] # Answer encoding. answer_tokenizer: @@ -45,8 +45,8 @@ pipeline: answer_embeddings: priority: 1.2 type: SentenceEmbeddings - embeddings_size: 300 - pretrained_embeddings_file: glove.840B.300d.txt + embeddings_size: 100 + pretrained_embeddings_file: glove.6B.100d.txt data_folder: ~/data/vqa-med word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv export_word_mappings_to_globals: True diff --git a/ptp/components/utils/embeddings.py b/ptp/components/utils/embeddings.py index ca1e0fb..099684d 100644 --- a/ptp/components/utils/embeddings.py +++ b/ptp/components/utils/embeddings.py @@ -19,7 +19,10 @@ import os import numpy as np +import tqdm + import torch + import ptp.components.utils.io as io @@ -111,6 +114,9 @@ def load_pretrained_embeddings(logger, folder, embeddings_name, word_to_ix, embe else: logger.info("File '{}' containing pretrained embeddings found in '{}' folder".format(embeddings_name, folder)) + # Get number of lines/vectors. + num_lines = sum([1 for line in open(os.path.join(folder, embeddings_name))]) + t = tqdm.tqdm(total=num_lines) with open(os.path.join(folder, embeddings_name)) as f: # Parse file and cherry pick the vectors that fit our vocabulary. @@ -130,7 +136,6 @@ def load_pretrained_embeddings(logger, folder, embeddings_name, word_to_ix, embe word = values[0] # Get remaining vector. vector = np.array(values[1:], dtype='float32') - # Get index. index = word_to_ix.get(word) if index: @@ -139,7 +144,9 @@ def load_pretrained_embeddings(logger, folder, embeddings_name, word_to_ix, embe embeddings[index] = vector # Increment counter. num_loaded_embs += 1 - + t.update() + t.close() + logger.info("Loaded {} pretrained embeddings for vocabulary of size {} from {}".format(num_loaded_embs, len(word_to_ix), embeddings_name)) # Return matrix with embeddings. From 2be2380cb6df5ccc004fb06952ef489e570fb5ff Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 19:55:56 -0700 Subject: [PATCH 06/14] Two pipelines for my table --- ...lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml | 12 + ...ic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml | 6 +- ...ve_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml | 383 ++++++++++++++++++ ...ve_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml | 383 ++++++++++++++++++ 4 files changed, 781 insertions(+), 3 deletions(-) create mode 100644 configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml create mode 100644 configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml diff --git a/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml index 9f9d7c9..d7ee74b 100644 --- a/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml @@ -39,6 +39,8 @@ hyperparameters: answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [100] batch_size: &batch_size 64 + preload_images: &preload_images True + num_workers: &num_workers 0 # Training parameters: training: @@ -49,10 +51,15 @@ training: # Appy all preprocessing/data augmentations. question_preprocessing: *question_preprocessing image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images streams: questions: tokenized_questions sampler: weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers # Optimizer parameters: optimizer: @@ -67,14 +74,19 @@ training: # Validation parameters: validation: + partial_validation_interval: 100 problem: batch_size: *batch_size categories: C1,C2,C3 # Appy all preprocessing/data augmentations. question_preprocessing: *question_preprocessing image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false streams: questions: tokenized_questions + dataloader: + num_workers: 1 pipeline: diff --git a/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index 958161a..a4d91bb 100644 --- a/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -81,12 +81,12 @@ validation: # Appy all preprocessing/data augmentations. question_preprocessing: *question_preprocessing image_preprocessing: *image_preprocessing - # Preload images. - preload_images: *preload_images + # Preload images: false, as we will need them only once, at the end. + preload_images: false streams: questions: tokenized_questions dataloader: - num_workers: *num_workers + num_workers: 1 pipeline: diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml new file mode 100644 index 0000000..5168831 --- /dev/null +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -0,0 +1,383 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + # Image encoder. + image_encoder_model: &image_encoder_model vgg16 + # Options: vgg16 | densenet121 | resnet152 | resnet50 + image_encoder_output_size_val: &image_encoder_output_size_val 100 + + # Question encoder. + question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt + # Options: '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt | glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled + question_encoder_embeddings_size_val: &question_encoder_embeddings_size_val 50 + question_encoder_lstm_size_val: &question_encoder_lstm_size_val 50 + question_encoder_output_size_val: &question_encoder_output_size_val 100 + + # Fusion I: image + question + question_image_fusion_type_val: &question_image_fusion_type ElementWiseMultiplication + # Options: ElementWiseMultiplication | ? (component: question_image_fusion) + question_image_fusion_size_val: &question_image_fusion_size_val 100 + + # Image size encoder. + image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10 + + # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val) + question_image_size_fusion_size_val: &question_image_size_fusion_size_val 110 + + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [100] + + batch_size: &batch_size 100 + preload_images: &preload_images True + num_workers: &num_workers 0 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, fused_activation_size] + values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val, *image_encoder_output_size_val, *question_image_fusion_size_val] + + # Statistics. + batch_size: + priority: 0.1 + type: BatchSizeStatistics + + # Answer encoding. + pipe1_all_answer_indexer: + priority: 0.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + ################# PIPE 0: QUESTION CATEGORIZATION ################# + + # Add global variables - the ones related to only question categorization. + pipe0_global_publisher: + priority: 0.3 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [pipe0_question_encoder_output_size] + values: [100] + + # Model 1: question embeddings + pipe0_question_embeddings: + priority: 0.4 + type: SentenceEmbeddings + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + globals: + embeddings_size: pipe0_embeddings_size + + # Model 2: question RNN + pipe0_lstm: + priority: 0.5 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_question_activations + globals: + input_size: pipe0_embeddings_size + prediction_size: pipe0_question_encoder_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.6 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.7 + streams: + inputs: pipe0_question_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: pipe0_question_encoder_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.8 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + priority: 0.9 + type: AccuracyStatistics + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + priority: 1.1 + type: SentenceEmbeddings + embeddings_size: *question_encoder_embeddings_size_val + pretrained_embeddings_file: *question_encoder_embeddings + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + globals: + embeddings_size: pipe1_embeddings_size + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + hidden_size: *question_encoder_lstm_size_val + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: pipe1_embeddings_size + prediction_size: question_encoder_output_size + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + priority: 2.1 + type: TorchVisionWrapper + model: *image_encoder_model + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + priority: 3.1 + type: FeedForwardNetwork + use_losfotmax: False + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: image-question fusion ################# + # Element wise multiplication + FF. + question_image_fusion: + priority: 4.1 + type: *question_image_fusion_type + dropout_rate: 0.5 + streams: + image_encodings: image_activations + question_encodings: question_activations + outputs: fused_activations + globals: + image_encoding_size: image_encoder_output_size + question_encoding_size: question_encoder_output_size + output_size: fused_activation_size + + question_image_ffn: + priority: 4.2 + type: FeedForwardNetwork + hidden_sizes: [*question_image_fusion_size_val] + dropout_rate: 0.5 + use_logsoftmax: False + streams: + inputs: fused_activations + predictions: question_image_activations + globals: + input_size: fused_activation_size + prediction_size: fused_activation_size + + ################# PIPE 5: image-question-image size fusion ################# + + # 5th subpipeline: concatenation + concat: + priority: 5.1 + type: Concatenation + input_streams: [question_image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,*question_image_fusion_size_val],[-1,*image_size_encoder_output_size_val]] + output_dims: [-1,*question_image_size_fusion_size_val] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Model 4: FFN C123 answering + pipe6_c123_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe6_c123_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + loss: pipe6_c123_loss + + pipe6_c123_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c123_binary_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c123_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, pipe0_predicted_question_categories_names, + answers, predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml new file mode 100644 index 0000000..43a838b --- /dev/null +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml @@ -0,0 +1,383 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + # Image encoder. + image_encoder_model: &image_encoder_model vgg16 + # Options: vgg16 | densenet121 | resnet152 | resnet50 + image_encoder_output_size_val: &image_encoder_output_size_val 100 + + # Question encoder. + question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt + # Options: '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt | glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled + question_encoder_embeddings_size_val: &question_encoder_embeddings_size_val 50 + question_encoder_lstm_size_val: &question_encoder_lstm_size_val 50 + question_encoder_output_size_val: &question_encoder_output_size_val 100 + + # Fusion I: image + question + question_image_fusion_type_val: &question_image_fusion_type MultimodalCompactBilinearPooling + # Options: ElementWiseMultiplication | MultimodalCompactBilinearPooling | + question_image_fusion_size_val: &question_image_fusion_size_val 100 + + # Image size encoder. + image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10 + + # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val) + question_image_size_fusion_size_val: &question_image_size_fusion_size_val 110 + + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [100] + + batch_size: &batch_size 100 + preload_images: &preload_images True + num_workers: &num_workers 0 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, fused_activation_size] + values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val, *image_encoder_output_size_val, *question_image_fusion_size_val] + + # Statistics. + batch_size: + priority: 0.1 + type: BatchSizeStatistics + + # Answer encoding. + pipe1_all_answer_indexer: + priority: 0.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + ################# PIPE 0: QUESTION CATEGORIZATION ################# + + # Add global variables - the ones related to only question categorization. + pipe0_global_publisher: + priority: 0.3 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [pipe0_question_encoder_output_size] + values: [100] + + # Model 1: question embeddings + pipe0_question_embeddings: + priority: 0.4 + type: SentenceEmbeddings + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + globals: + embeddings_size: pipe0_embeddings_size + + # Model 2: question RNN + pipe0_lstm: + priority: 0.5 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_question_activations + globals: + input_size: pipe0_embeddings_size + prediction_size: pipe0_question_encoder_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.6 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.7 + streams: + inputs: pipe0_question_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: pipe0_question_encoder_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.8 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + priority: 0.9 + type: AccuracyStatistics + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + priority: 1.1 + type: SentenceEmbeddings + embeddings_size: *question_encoder_embeddings_size_val + pretrained_embeddings_file: *question_encoder_embeddings + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + globals: + embeddings_size: pipe1_embeddings_size + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + hidden_size: *question_encoder_lstm_size_val + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: pipe1_embeddings_size + prediction_size: question_encoder_output_size + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + priority: 2.1 + type: TorchVisionWrapper + model: *image_encoder_model + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + priority: 3.1 + type: FeedForwardNetwork + use_losfotmax: False + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: image-question fusion ################# + # Element wise multiplication + FF. + question_image_fusion: + priority: 4.1 + type: *question_image_fusion_type + dropout_rate: 0.5 + streams: + image_encodings: image_activations + question_encodings: question_activations + outputs: fused_activations + globals: + image_encoding_size: image_encoder_output_size + question_encoding_size: question_encoder_output_size + output_size: fused_activation_size + + question_image_ffn: + priority: 4.2 + type: FeedForwardNetwork + hidden_sizes: [*question_image_fusion_size_val] + dropout_rate: 0.5 + use_logsoftmax: False + streams: + inputs: fused_activations + predictions: question_image_activations + globals: + input_size: fused_activation_size + prediction_size: fused_activation_size + + ################# PIPE 5: image-question-image size fusion ################# + + # 5th subpipeline: concatenation + concat: + priority: 5.1 + type: Concatenation + input_streams: [question_image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,*question_image_fusion_size_val],[-1,*image_size_encoder_output_size_val]] + output_dims: [-1,*question_image_size_fusion_size_val] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Model 4: FFN C123 answering + pipe6_c123_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe6_c123_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + loss: pipe6_c123_loss + + pipe6_c123_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c123_binary_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c123_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, pipe0_predicted_question_categories_names, + answers, predicted_answers + + +#: pipeline From e86366345974440a3a958a40f83bbe2a1438dd99 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 20:30:07 -0700 Subject: [PATCH 07/14] glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss config + modifications of VQA_Attention - export of output_size to globals --- .../components/models/vqa/attention.yml | 6 +- ...ic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml | 4 + ...ve_lstm_vgg16_att_is_cat_ffn_c123_loss.yml | 391 ++++++++++++++++++ ...ve_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml | 4 + ptp/components/models/vqa/attention.py | 2 + 5 files changed, 404 insertions(+), 3 deletions(-) create mode 100644 configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml diff --git a/configs/default/components/models/vqa/attention.yml b/configs/default/components/models/vqa/attention.yml index 830f4b8..4018be1 100644 --- a/configs/default/components/models/vqa/attention.yml +++ b/configs/default/components/models/vqa/attention.yml @@ -46,13 +46,13 @@ globals: # Size of the question encodings input (RETRIEVED) question_encoding_size: question_encoding_size - # Size of the output (RETRIEVED) - output_size: output_size - #################################################################### # 4. Keymappings associated with GLOBAL variables that will be SET. #################################################################### + # Size of the output (SET) + output_size: output_size + #################################################################### # 5. Keymappings associated with statistics that will be ADDED. #################################################################### diff --git a/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index a4d91bb..516df63 100644 --- a/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -90,6 +90,10 @@ validation: pipeline: + # Disable flow 0. + disable: + pipe0_question_embeddings,pipe0_question_embeddings,pipe0_lstm,pipe0_classifier, + pipe0_category_decoder,pipe0_category_accuracy ################# PIPE 0: SHARED ################# diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml new file mode 100644 index 0000000..fe63930 --- /dev/null +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml @@ -0,0 +1,391 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + # Image encoder. + image_encoder_model: &image_encoder_model vgg16 + # Options: vgg16 | densenet121 | resnet152 | resnet50 + #image_encoder_output_size_val: &image_encoder_output_size_val 100 + # INFO: this variable is not important, as we are using features in this pipeline!! + + # Question encoder. + question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt + # Options: '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt | glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled + question_encoder_embeddings_size_val: &question_encoder_embeddings_size_val 50 + question_encoder_lstm_size_val: &question_encoder_lstm_size_val 50 + question_encoder_output_size_val: &question_encoder_output_size_val 100 + + # Fusion I: image + question + question_image_fusion_type_val: &question_image_fusion_type VQA_Attention + # Options: ElementWiseMultiplication | VQA_Attention + #question_image_fusion_size_val: &question_image_fusion_size_val 1124 + # INFO: this variable is set by VQA_Attention component! + + # Image size encoder. + image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10 + + # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val) + question_image_size_fusion_size_val: &question_image_size_fusion_size_val 1134 + + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] + + batch_size: &batch_size 100 + preload_images: &preload_images True + num_workers: &num_workers 0 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + # Disable flow 0. + disable: + pipe0_question_embeddings,pipe0_question_embeddings,pipe0_lstm,pipe0_classifier, + pipe0_category_decoder,pipe0_category_accuracy + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size] #, image_encoder_output_size] #, fused_activation_size] + values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val] #, *image_encoder_output_size_val] #, *question_image_fusion_size_val] + + # Statistics. + batch_size: + priority: 0.1 + type: BatchSizeStatistics + + # Answer encoding. + pipe1_all_answer_indexer: + priority: 0.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + ################# PIPE 0: QUESTION CATEGORIZATION ################# + + # Add global variables - the ones related to only question categorization. + pipe0_global_publisher: + priority: 0.3 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [pipe0_question_encoder_output_size] + values: [100] + + # Model 1: question embeddings + pipe0_question_embeddings: + priority: 0.4 + type: SentenceEmbeddings + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + globals: + embeddings_size: pipe0_embeddings_size + + # Model 2: question RNN + pipe0_lstm: + priority: 0.5 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_question_activations + globals: + input_size: pipe0_embeddings_size + prediction_size: pipe0_question_encoder_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.6 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.7 + streams: + inputs: pipe0_question_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: pipe0_question_encoder_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.8 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + priority: 0.9 + type: AccuracyStatistics + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + priority: 1.1 + type: SentenceEmbeddings + embeddings_size: *question_encoder_embeddings_size_val + pretrained_embeddings_file: *question_encoder_embeddings + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + globals: + embeddings_size: pipe1_embeddings_size + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + hidden_size: *question_encoder_lstm_size_val + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: pipe1_embeddings_size + prediction_size: question_encoder_output_size + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + priority: 2.1 + type: TorchVisionWrapper + model: *image_encoder_model + return_feature_maps: True + streams: + inputs: images + outputs: feature_maps + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + priority: 3.1 + type: FeedForwardNetwork + use_losfotmax: False + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: image-question fusion ################# + # Attention + FF. + question_image_fusion: + priority: 4.1 + type: *question_image_fusion_type + dropout_rate: 0.5 + # Attention params. + latent_size: 100 + num_attention_heads: 2 + streams: + image_encodings: feature_maps + question_encodings: question_activations + outputs: fused_activations + globals: + question_encoding_size: question_encoder_output_size + output_size: fused_activation_size + + + question_image_ffn: + priority: 4.2 + type: FeedForwardNetwork + #hidden_sizes: [*question_image_fusion_size_val] + dropout_rate: 0.5 + use_logsoftmax: False + streams: + inputs: fused_activations + predictions: question_image_activations + globals: + input_size: fused_activation_size + prediction_size: fused_activation_size + + ################# PIPE 5: image-question-image size fusion ################# + + # 5th subpipeline: concatenation + concat: + priority: 5.1 + type: Concatenation + input_streams: [question_image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,1124],[-1,*image_size_encoder_output_size_val]] + output_dims: [-1,*question_image_size_fusion_size_val] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Model 4: FFN C123 answering + pipe6_c123_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe6_c123_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + loss: pipe6_c123_loss + + pipe6_c123_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c123_binary_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c123_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, pipe0_predicted_question_categories_names, + answers, predicted_answers + + +#: pipeline diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index 5168831..01d13e4 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -90,6 +90,10 @@ validation: pipeline: + # Disable flow 0. + disable: + pipe0_question_embeddings,pipe0_question_embeddings,pipe0_lstm,pipe0_classifier, + pipe0_category_decoder,pipe0_category_accuracy ################# PIPE 0: SHARED ################# diff --git a/ptp/components/models/vqa/attention.py b/ptp/components/models/vqa/attention.py index 15c7914..88c868f 100644 --- a/ptp/components/models/vqa/attention.py +++ b/ptp/components/models/vqa/attention.py @@ -62,6 +62,8 @@ def __init__(self, name, config): # Output feature size self.output_size = self.feature_maps_depth*self.num_attention_heads + self.question_encoding_size + # Export to globals. + self.globals["output_size"] = self.output_size # Map image and question encodings to a common latent space of dimension 'latent_size'. self.image_encodings_conv = torch.nn.Conv2d(self.feature_maps_depth, self.latent_size, 1, bias=False) From f428b311be43ed67d95c13d10ee7d8713801bed6 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 20:36:08 -0700 Subject: [PATCH 08/14] disabling components related to question categorization --- .../glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml | 4 ++++ .../tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml index d7ee74b..65eed52 100644 --- a/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml @@ -90,6 +90,10 @@ validation: pipeline: + # Disable flow 0. + disable: + pipe0_question_embeddings,pipe0_question_embeddings,pipe0_lstm,pipe0_classifier, + pipe0_category_decoder,pipe0_category_accuracy ################# PIPE 0: SHARED ################# diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml index 43a838b..f7d4d89 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml @@ -90,6 +90,10 @@ validation: pipeline: + # Disable flow 0. + disable: + pipe0_question_embeddings,pipe0_question_embeddings,pipe0_lstm,pipe0_classifier, + pipe0_category_decoder,pipe0_category_accuracy ################# PIPE 0: SHARED ################# From 92a7b24cf6937bba393a9208fc5a3443cf604056 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sun, 5 May 2019 09:54:16 -0700 Subject: [PATCH 09/14] config with att + resnet152 --- ...stm_resnet152_att_is_cat_ffn_c123_loss.yml | 391 ++++++++++++++++++ 1 file changed, 391 insertions(+) create mode 100644 configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml new file mode 100644 index 0000000..cad4b5b --- /dev/null +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml @@ -0,0 +1,391 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +hyperparameters: + # In here I am putting some of the hyperparameters from spreadsheet. + + question_preprocessing: &question_preprocessing lowercase, remove_punctuation, tokenize + # Accepted formats: a,b,c or [a,b,c] + # none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all + + image_preprocessing: &image_preprocessing normalize + # Accepted formats: a,b,c or [a,b,c] + # none | random_affine | random_horizontal_flip | normalize | all + + # Image encoder. + image_encoder_model: &image_encoder_model resnet152 + # Options: vgg16 | densenet121 | resnet152 | resnet50 + #image_encoder_output_size_val: &image_encoder_output_size_val 100 + # INFO: this variable is not important, as we are using features in this pipeline!! + + # Question encoder. + question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt + # Options: '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt | glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled + question_encoder_embeddings_size_val: &question_encoder_embeddings_size_val 50 + question_encoder_lstm_size_val: &question_encoder_lstm_size_val 50 + question_encoder_output_size_val: &question_encoder_output_size_val 100 + + # Fusion I: image + question + question_image_fusion_type_val: &question_image_fusion_type VQA_Attention + # Options: ElementWiseMultiplication | VQA_Attention + #question_image_fusion_size_val: &question_image_fusion_size_val 1124 + # INFO: this variable is set by VQA_Attention component! + + # Image size encoder. + image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10 + + # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val) + question_image_size_fusion_size_val: &question_image_size_fusion_size_val 1134 + + # Final classifier: FFN. + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] + + batch_size: &batch_size 256 + preload_images: &preload_images True + num_workers: &num_workers 0 + +# Training parameters: +training: + problem: + batch_size: *batch_size + categories: C1,C2,C3 + export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images + streams: + questions: tokenized_questions + sampler: + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers + + # Optimizer parameters: + optimizer: + name: Adam + lr: 0.0001 + + # Terminal conditions: + terminal_conditions: + loss_stop: 1.0e-3 + episode_limit: 10000 + epoch_limit: -1 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + batch_size: *batch_size + categories: C1,C2,C3 + # Appy all preprocessing/data augmentations. + question_preprocessing: *question_preprocessing + image_preprocessing: *image_preprocessing + # Preload images: false, as we will need them only once, at the end. + preload_images: false + streams: + questions: tokenized_questions + dataloader: + num_workers: 1 + + +pipeline: + # Disable flow 0. + disable: + pipe0_question_embeddings,pipe0_question_embeddings,pipe0_lstm,pipe0_classifier, + pipe0_category_decoder,pipe0_category_accuracy + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + priority: 0 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [question_encoder_output_size, image_size_encoder_input_size, image_size_encoder_output_size] #, image_encoder_output_size] #, fused_activation_size] + values: [*question_encoder_output_size_val, 2, *image_size_encoder_output_size_val] #, *image_encoder_output_size_val] #, *question_image_fusion_size_val] + + # Statistics. + batch_size: + priority: 0.1 + type: BatchSizeStatistics + + # Answer encoding. + pipe1_all_answer_indexer: + priority: 0.2 + type: LabelIndexer + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c123_binary_yn + word_mappings: word_mappings_c123_binary_yn + + + ################# PIPE 0: QUESTION CATEGORIZATION ################# + + # Add global variables - the ones related to only question categorization. + pipe0_global_publisher: + priority: 0.3 + type: GlobalVariablePublisher + # Add input_size to globals. + keys: [pipe0_question_encoder_output_size] + values: [100] + + # Model 1: question embeddings + pipe0_question_embeddings: + priority: 0.4 + type: SentenceEmbeddings + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + globals: + embeddings_size: pipe0_embeddings_size + + # Model 2: question RNN + pipe0_lstm: + priority: 0.5 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_question_activations + globals: + input_size: pipe0_embeddings_size + prediction_size: pipe0_question_encoder_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.6 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.7 + streams: + inputs: pipe0_question_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: pipe0_question_encoder_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.8 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + priority: 0.9 + type: AccuracyStatistics + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + priority: 1.1 + type: SentenceEmbeddings + embeddings_size: *question_encoder_embeddings_size_val + pretrained_embeddings_file: *question_encoder_embeddings + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + globals: + embeddings_size: pipe1_embeddings_size + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + hidden_size: *question_encoder_lstm_size_val + prediction_mode: Last + initial_state: Trainable + use_logsoftmax: False + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: pipe1_embeddings_size + prediction_size: question_encoder_output_size + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + priority: 2.1 + type: TorchVisionWrapper + model: *image_encoder_model + return_feature_maps: True + streams: + inputs: images + outputs: feature_maps + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + priority: 3.1 + type: FeedForwardNetwork + use_losfotmax: False + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: image-question fusion ################# + # Attention + FF. + question_image_fusion: + priority: 4.1 + type: *question_image_fusion_type + dropout_rate: 0.5 + # Attention params. + latent_size: 100 + num_attention_heads: 2 + streams: + image_encodings: feature_maps + question_encodings: question_activations + outputs: fused_activations + globals: + question_encoding_size: question_encoder_output_size + output_size: fused_activation_size + + + question_image_ffn: + priority: 4.2 + type: FeedForwardNetwork + #hidden_sizes: [*question_image_fusion_size_val] + dropout_rate: 0.5 + use_logsoftmax: False + streams: + inputs: fused_activations + predictions: question_image_activations + globals: + input_size: fused_activation_size + prediction_size: fused_activation_size + + ################# PIPE 5: image-question-image size fusion ################# + + # 5th subpipeline: concatenation + concat: + priority: 5.1 + type: Concatenation + input_streams: [question_image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,1124],[-1,*image_size_encoder_output_size_val]] + output_dims: [-1,*question_image_size_fusion_size_val] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + ################# PIPE 6: C1 + C2 + C3 questions ################# + + # Model 4: FFN C123 answering + pipe6_c123_answer_classifier: + priority: 6.3 + type: FeedForwardNetwork + hidden: *answer_classifier_hidden_sizes_val + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c123_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c123_binary_yn + + pipe6_c123_nllloss: + priority: 6.4 + type: NLLLoss + targets_dim: 1 + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + loss: pipe6_c123_loss + + pipe6_c123_precision_recall: + priority: 6.5 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + #show_confusion_matrix: True + streams: + predictions: pipe6_c123_predictions + targets: answers_ids + globals: + word_mappings: word_mappings_c123_binary_yn + statistics: + precision: pipe6_c123_precision + recall: pipe6_c123_recall + f1score: pipe6_c123_f1score + + # C123 Predictions decoder. + pipe5_c123_prediction_decoder: + priority: 6.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe6_c123_predictions + outputs: predicted_answers + globals: + word_mappings: word_mappings_c123_binary_yn + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + priority: 9.3 + type: StreamViewer + input_streams: + tokenized_questions, + category_names, pipe0_predicted_question_categories_names, + answers, predicted_answers + + +#: pipeline From 2aef2990de0f14e162340a40a2edfdb261336389 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sun, 5 May 2019 11:30:34 -0700 Subject: [PATCH 10/14] Fix in MCB enabling loading/storing projection matrices, flag for making them trainable --- .../multimodal_compact_bilinear_pooling.yml | 4 +++ .../c2_classification_all_rnn_vgg16_mcb.yml | 2 +- .../multimodal_compact_bilinear_pooling.py | 28 +++++++++++-------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/configs/default/components/models/vqa/multimodal_compact_bilinear_pooling.yml b/configs/default/components/models/vqa/multimodal_compact_bilinear_pooling.yml index 3d8a98a..9c5d8f4 100644 --- a/configs/default/components/models/vqa/multimodal_compact_bilinear_pooling.yml +++ b/configs/default/components/models/vqa/multimodal_compact_bilinear_pooling.yml @@ -4,6 +4,10 @@ # 1. CONFIGURATION PARAMETERS that will be LOADED by the component. #################################################################### +# Parameter denoting whether projection matrices are trainable (LOADED) +# Setting flag that to true will result in trainable, dense (i.e. not "sketch") projection layers. +trainable_projections: False + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml index d28a24f..075c958 100644 --- a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_mcb.yml @@ -8,7 +8,7 @@ pipeline: type: GlobalVariablePublisher # Add input_size to globals. keys: [question_encoder_output_size, image_encoder_output_size, fused_image_question_activation_size] - values: [200, 1000, 100] + values: [200, 500, 100] ################# PIPE 0: question ################# # Questions encoding. diff --git a/ptp/components/models/vqa/multimodal_compact_bilinear_pooling.py b/ptp/components/models/vqa/multimodal_compact_bilinear_pooling.py index 4e2c6be..0a3953c 100644 --- a/ptp/components/models/vqa/multimodal_compact_bilinear_pooling.py +++ b/ptp/components/models/vqa/multimodal_compact_bilinear_pooling.py @@ -61,8 +61,13 @@ def __init__(self, name, config): self.output_size = self.globals["output_size"] # Initialize sketch projection matrices. - self.image_sketch_projection_matrix = self.generate_count_sketch_projection_matrix(self.image_encoding_size, self.output_size) - self.question_sketch_projection_matrix = self.generate_count_sketch_projection_matrix(self.question_encoding_size, self.output_size) + image_sketch_projection_matrix = self.generate_count_sketch_projection_matrix(self.image_encoding_size, self.output_size) + question_sketch_projection_matrix = self.generate_count_sketch_projection_matrix(self.question_encoding_size, self.output_size) + + # Make them parameters of the model, so can be stored/loaded and trained (optionally). + trainable_projections = self.config["trainable_projections"] + self.image_sketch_projection_matrix = torch.nn.Parameter(image_sketch_projection_matrix, requires_grad=trainable_projections) + self.question_sketch_projection_matrix = torch.nn.Parameter(question_sketch_projection_matrix, requires_grad=trainable_projections) def generate_count_sketch_projection_matrix(self, input_size, output_size): @@ -77,21 +82,22 @@ def generate_count_sketch_projection_matrix(self, input_size, output_size): # Generate s: 1 or -1 s = 2 * np.random.randint(2, size=input_size) - 1 s = torch.from_numpy(s) - #print("s=",s) + #print("s=",s.shape) # Generate h (indices) h = np.random.randint(output_size, size=input_size) - #print("h=",h) + #print("h=",h.shape) indices = np.concatenate((np.arange(input_size)[..., np.newaxis],h[..., np.newaxis]), axis=1) indices = torch.from_numpy(indices) - #print("indices=",indices) + #print("indices=",indices.shape) # Generate sparse matrix. sparse_sketch_matrix = torch.sparse.FloatTensor(indices.t(), s, torch.Size([input_size, output_size])) - #print("\n sparse_sketch_matrix=",sparse_sketch_matrix) + #print("\n sparse_sketch_matrix=",sparse_sketch_matrix.shape) # Return dense matrix. dense_ssm = sparse_sketch_matrix.to_dense().type(self.app_state.FloatTensor) #print("\n dense_ssm=",dense_ssm) + return dense_ssm @@ -125,16 +131,16 @@ def forward(self, data_dict): :param data_dict: DataDict({'images',**}) :type data_dict: ``ptp.dadatypes.DataDict`` """ - # Unpack DataDict. enc_img = data_dict[self.key_image_encodings] enc_q = data_dict[self.key_question_encodings] - #print("\n enc_img=",enc_img) - #print("\n image_sketch_projection_matrix=",self.image_sketch_projection_matrix) + + sketch_pm_img = self.image_sketch_projection_matrix + sketch_pm_q = self.question_sketch_projection_matrix # Project both batches. - sketch_img = enc_img.mm(self.image_sketch_projection_matrix) - sketch_q = enc_q.mm(self.question_sketch_projection_matrix) + sketch_img = enc_img.mm(sketch_pm_img) + sketch_q = enc_q.mm(sketch_pm_q) # Add imaginary parts (with zeros). sketch_img_reim = torch.stack([sketch_img, torch.zeros(sketch_img.shape).type(self.app_state.FloatTensor)], dim=2) From 36839bc1686fe1af5edae8dec0b9965d089e3864 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sun, 5 May 2019 12:16:32 -0700 Subject: [PATCH 11/14] Fixed verions of pytorch/torchvision/torchtext --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index c3dba09..72bfa6e 100644 --- a/setup.py +++ b/setup.py @@ -166,9 +166,9 @@ 'nltk', 'pandas', 'pillow', - #'torchtext', - 'torchvision', - 'torch', + 'torchtext==0.3.1', + 'torchvision==0.1.9', + 'torch==1.0.1', 'PyYAML', 'requests' ], From 96200f83e7d387cdeb1616cc55f1e4660952a38e Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sun, 5 May 2019 12:29:34 -0700 Subject: [PATCH 12/14] torchvision version==0.2.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 72bfa6e..b7ba573 100644 --- a/setup.py +++ b/setup.py @@ -167,7 +167,7 @@ 'pandas', 'pillow', 'torchtext==0.3.1', - 'torchvision==0.1.9', + 'torchvision==0.2.1', 'torch==1.0.1', 'PyYAML', 'requests' From 5482a3e49faffb14b6267c4cb3affd722bdfb21d Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sun, 5 May 2019 12:52:17 -0700 Subject: [PATCH 13/14] renamted exemplary config, added new one for glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss --- ...le_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml} | 3 ++- ...glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml | 2 +- ...love_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml} | 12 ++++++------ .../glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml | 2 +- .../glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml | 2 +- .../glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml | 12 ++++++------ 6 files changed, 17 insertions(+), 16 deletions(-) rename configs/vqa_med_2019/evaluation/{mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml => example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml} (99%) rename configs/vqa_med_2019/evaluation/{glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml => tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml} (97%) diff --git a/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml similarity index 99% rename from configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml rename to configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index 516df63..2f498db 100644 --- a/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/example_mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -38,9 +38,10 @@ hyperparameters: # Final classifier: FFN. answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [83] + # Parameters related to GPU/CPU distribution. batch_size: &batch_size 200 preload_images: &preload_images True - num_workers: &num_workers 0 + num_workers: &num_workers 1 # Training parameters: training: diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml index cad4b5b..3157f00 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_att_is_cat_ffn_c123_loss.yml @@ -42,7 +42,7 @@ hyperparameters: batch_size: &batch_size 256 preload_images: &preload_images True - num_workers: &num_workers 0 + num_workers: &num_workers 1 # Training parameters: training: diff --git a/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml similarity index 97% rename from configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml rename to configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml index 65eed52..f89a53e 100644 --- a/configs/vqa_med_2019/evaluation/glove_lstm_resnet50_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_resnet152_mcb_is_cat_ffn_c123_loss.yml @@ -13,7 +13,7 @@ hyperparameters: # none | random_affine | random_horizontal_flip | normalize | all # Image encoder. - image_encoder_model: &image_encoder_model resnet50 + image_encoder_model: &image_encoder_model resnet152 # Options: vgg16 | densenet121 | resnet152 | resnet50 image_encoder_output_size_val: &image_encoder_output_size_val 100 @@ -25,8 +25,8 @@ hyperparameters: question_encoder_output_size_val: &question_encoder_output_size_val 100 # Fusion I: image + question - question_image_fusion_type_val: &question_image_fusion_type ElementWiseMultiplication - # Options: ElementWiseMultiplication | ? (component: question_image_fusion) + question_image_fusion_type_val: &question_image_fusion_type MultimodalCompactBilinearPooling + # Options: ElementWiseMultiplication | MultimodalCompactBilinearPooling | VQA_Attention question_image_fusion_size_val: &question_image_fusion_size_val 100 # Image size encoder. @@ -38,9 +38,9 @@ hyperparameters: # Final classifier: FFN. answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [100] - batch_size: &batch_size 64 - preload_images: &preload_images True - num_workers: &num_workers 0 + batch_size: &batch_size 150 + preload_images: &preload_images False + num_workers: &num_workers 3 # Training parameters: training: diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml index fe63930..5dde544 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_att_is_cat_ffn_c123_loss.yml @@ -42,7 +42,7 @@ hyperparameters: batch_size: &batch_size 100 preload_images: &preload_images True - num_workers: &num_workers 0 + num_workers: &num_workers 1 # Training parameters: training: diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index 01d13e4..6e1fa71 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -40,7 +40,7 @@ hyperparameters: batch_size: &batch_size 100 preload_images: &preload_images True - num_workers: &num_workers 0 + num_workers: &num_workers 1 # Training parameters: training: diff --git a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml index f7d4d89..4f52097 100644 --- a/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/tom/glove_lstm_vgg16_mcb_is_cat_ffn_c123_loss.yml @@ -15,7 +15,7 @@ hyperparameters: # Image encoder. image_encoder_model: &image_encoder_model vgg16 # Options: vgg16 | densenet121 | resnet152 | resnet50 - image_encoder_output_size_val: &image_encoder_output_size_val 100 + image_encoder_output_size_val: &image_encoder_output_size_val 1000 # Question encoder. question_encoder_embeddings: &question_encoder_embeddings glove.6B.50d.txt @@ -27,20 +27,20 @@ hyperparameters: # Fusion I: image + question question_image_fusion_type_val: &question_image_fusion_type MultimodalCompactBilinearPooling # Options: ElementWiseMultiplication | MultimodalCompactBilinearPooling | - question_image_fusion_size_val: &question_image_fusion_size_val 100 + question_image_fusion_size_val: &question_image_fusion_size_val 200 # Image size encoder. image_size_encoder_output_size_val: &image_size_encoder_output_size_val 10 # Fusion II: (image + question) + image size (must be = question_image_fusion_size_val + image_size_encoder_output_size_val) - question_image_size_fusion_size_val: &question_image_size_fusion_size_val 110 + question_image_size_fusion_size_val: &question_image_size_fusion_size_val 210 # Final classifier: FFN. - answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [100] + answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [500] - batch_size: &batch_size 100 + batch_size: &batch_size 200 preload_images: &preload_images True - num_workers: &num_workers 0 + num_workers: &num_workers 1 # Training parameters: training: From 17ed8d91cdd7d6deaf08fe6b27a230a9b9d253ca Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sun, 5 May 2019 13:45:47 -0700 Subject: [PATCH 14/14] default q categorization: streaming images: False, batch: 256 --- .../default_question_categorization.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml index 79c56ea..4a0f255 100644 --- a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml +++ b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml @@ -5,6 +5,9 @@ training: problem: categories: all export_sample_weights: ~/data/vqa-med/answers.all.weights.csv + # Do not load and stream images! + stream_images: False + batch_size: 256 sampler: weights: ~/data/vqa-med/answers.all.weights.csv terminal_conditions: @@ -13,6 +16,9 @@ training: validation: problem: categories: all + # Do not load and stream images! + stream_images: False + batch_size: 256 pipeline: