From a86a5433250f8796783a0e1b560571f218de9190 Mon Sep 17 00:00:00 2001 From: Alexis Asseman <33075224+aasseman@users.noreply.github.com> Date: Thu, 18 Apr 2019 15:39:36 -0700 Subject: [PATCH 01/39] Added more modes to RNN (untested): - Optional input/output stream for hidden state - Autoregressive mode: First / No inputs --- .../models/recurrent_neural_network.yml | 22 ++- .../models/recurrent_neural_network.py | 126 ++++++++++++------ 2 files changed, 106 insertions(+), 42 deletions(-) diff --git a/configs/default/components/models/recurrent_neural_network.yml b/configs/default/components/models/recurrent_neural_network.yml index 3249555..f43a5bf 100644 --- a/configs/default/components/models/recurrent_neural_network.yml +++ b/configs/default/components/models/recurrent_neural_network.yml @@ -9,7 +9,15 @@ hidden_size: 100 # Flag informing the model to learn the intial state (h0/c0) (LOADED) # When false, (c0/c0) will be initialized as zeros. -initial_state_trainable: True + +# Initial state type: +# * Zero (null vector) +# * Trainable (xavier initialization, trainable) +# * Input (the initial hidden state comes from an input stream) +initial_state: Trainable + +# Wether to include the last hidden state in the outputs +output_last_state: False # Type of recurrent cell (LOADED) # Options: LSTM | GRU | RNN_TANH | RNN_RELU @@ -25,9 +33,19 @@ dropout_rate: 0 # Prediction mode (LOADED) # Options: # * Dense (passes every activation through output layer) | -# * Last (passes only the last activation though output layer) +# * Last (passes only the last activation though output layer) | +# * None (all outputs are discarded) prediction_mode: Dense +# Input mode +# Options: +# * Dense (every iteration expects an input) +# * Autoregression_First (Autoregression, expects an input for the first iteration) +# * Autoregression_None (Autoregression, first input will be a null vector) +input_mode: Dense + +autoregression_length: 42 + # If true, output of the last layer will be additionally processed with Log Softmax (LOADED) use_logsoftmax: True diff --git a/ptp/components/models/recurrent_neural_network.py b/ptp/components/models/recurrent_neural_network.py index 612c6dd..d06a48a 100644 --- a/ptp/components/models/recurrent_neural_network.py +++ b/ptp/components/models/recurrent_neural_network.py @@ -35,9 +35,22 @@ def __init__(self, name, config): # Call constructors of parent classes. Model.__init__(self, name, RecurrentNeuralNetwork, config) - # Get key mappings. - self.key_inputs = self.stream_keys["inputs"] - self.key_predictions = self.stream_keys["predictions"] + # Get input/output mode + self.input_mode = self.config["input_mode"] + self.output_last_state = self.config["output_last_state"] + + # Get prediction mode from configuration. + self.prediction_mode = self.config["prediction_mode"] + if self.prediction_mode not in ['Dense','Last', 'None']: + raise ConfigurationError("Invalid 'prediction_mode' (current {}, available {})".format(self.prediction_mode, ['Dense','Last', 'None'])) + + self.autoregression_length = self.config["autoregression_length"] + + # Check if initial state (h0/c0) is zero, trainable, or coming from input stream. + self.initial_state = self.config["initial_state"] + + # Get number of layers from config. + self.num_layers = self.config["num_layers"] # Retrieve input size from global variables. self.key_input_size = self.global_keys["input_size"] @@ -56,11 +69,6 @@ def __init__(self, name, config): else: raise ConfigurationError("RNN prediction size '{}' must be a single dimension (current {})".format(self.key_prediction_size, self.prediction_size)) - # Get prediction mode from configuration. - self.prediction_mode = self.config["prediction_mode"] - if self.prediction_mode not in ['Dense','Last']: - raise ConfigurationError("Invalid 'prediction_mode' (current {}, available {})".format(self.prediction_mode, ['Dense','Last'])) - # Retrieve hidden size from configuration. self.hidden_size = self.config["hidden_size"] if type(self.hidden_size) == list: @@ -69,14 +77,12 @@ def __init__(self, name, config): else: raise ConfigurationError("RNN hidden_size must be a single dimension (current {})".format(self.hidden_size)) - self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size)) - - # Get number of layers from config. - self.num_layers = self.config["num_layers"] - # Get dropout rate value from config. dropout_rate = self.config["dropout_rate"] + # Create dropout layer. + self.dropout = torch.nn.Dropout(dropout_rate) + # Create RNN depending on the configuration self.cell_type = self.config["cell_type"] if self.cell_type in ['LSTM', 'GRU']: @@ -88,18 +94,14 @@ def __init__(self, name, config): nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[self.cell_type] # Create rnn cell. self.rnn_cell = torch.nn.RNN(self.input_size, self.hidden_size, self.num_layers, nonlinearity=nonlinearity, dropout=dropout_rate, batch_first=True) - except KeyError: raise ConfigurationError( "Invalid RNN type, available options for 'cell_type' are ['LSTM', 'GRU', 'RNN_TANH', 'RNN_RELU'] (currently '{}')".format(self.cell_type)) - # Check if initial state (h0/c0) are trainable or not. - self.initial_state_trainable = self.config["initial_state_trainable"] - - # Parameters - for a single sample. + # Parameters - for a single sample. h0 = torch.zeros(self.num_layers, 1, self.hidden_size) c0 = torch.zeros(self.num_layers, 1, self.hidden_size) - if self.initial_state_trainable: + if self.initial_state == "Trainable": self.logger.info("Using trainable initial (h0/c0) state") # Initialize a single vector used as hidden state. # Initialize it using xavier initialization. @@ -110,15 +112,24 @@ def __init__(self, name, config): if self.cell_type == 'LSTM': torch.nn.init.xavier_uniform(c0) self.init_memory = torch.nn.Parameter(c0, requires_grad=True) - else: + elif self.initial_state == "Zero": self.logger.info("Using zero initial (h0/c0) state") # We will still embedd it into parameter to enable storing/loading of both types of models by each other. self.init_hidden = torch.nn.Parameter(h0, requires_grad=False) if self.cell_type == 'LSTM': self.init_memory = torch.nn.Parameter(c0, requires_grad=False) - # Create dropout layer. - self.dropout = torch.nn.Dropout(dropout_rate) + # Get key mappings. + if "None" not in self.input_mode: + self.key_inputs = self.stream_keys["inputs"] + if "None" not in self.prediction_mode: + self.key_predictions = self.stream_keys["predictions"] + if self.initial_state == "Input": + self.key_input_state = self.stream_keys["input_state"] + if self.output_last_state: + self.key_output_state = self.stream_keys["output_state"] + + self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size)) # Create the output layer. self.activation2output = torch.nn.Linear(self.hidden_size, self.prediction_size) @@ -151,10 +162,17 @@ def input_data_definitions(self): :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ - return { - self.key_inputs: DataDefinition([-1, -1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]"), - } + d = {} + + if self.input_mode == "Dense": + d[self.key_inputs] = DataDefinition([-1, -1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") + elif self.input_mode == "Autoregression_First": + d[self.key_inputs] = DataDefinition([-1, 1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") + if "Autoregression" in self.input_mode: + d[self.key_input_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size]) + + return d def output_data_definitions(self): """ @@ -162,17 +180,18 @@ def output_data_definitions(self): :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ + d = {} if self.prediction_mode == "Dense": - return { - self.key_predictions: DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") - } - else: # "Last" - return { - # Only last prediction. - self.key_predictions: DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") - } + d[self.key_predictions] = DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") + elif self.prediction_mode == "Last": # "Last" + # Only last prediction. + d[self.key_predictions] = DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") + if self.output_last_state: + d[self.key_output_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size]) + + return d def forward(self, data_dict): """ @@ -185,14 +204,33 @@ def forward(self, data_dict): """ # Get inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE] - inputs = data_dict[self.key_inputs] - batch_size = inputs.shape[0] + if "None" in self.input_mode: + batch_size = data_dict[self.key_input_state].shape[0] + inputs = torch.zeros(1, self.input_size, self.hidden_size) + else: + batch_size = inputs.shape[0] + inputs = data_dict[self.key_inputs] + + # Initialize hidden state. hidden = self.initialize_hiddens_state(batch_size) # Propagate inputs through rnn cell. - activations, hidden = self.rnn_cell(inputs, hidden) + if "Autoregression" in self.input_mode: + activations_partial, hidden = self.rnn_cell(inputs, hidden) + activations += [activations_partial] + for i in range(self.autoregression_length - 1): + activations_partial, hidden = self.rnn_cell(activations_partial, hidden) + if self.prediction_mode == "Dense": + activations += [activations_partial] + if self.prediction_mode == "Dense": + activations = torch.stack(activations, 1) + else: + activations = activations_partial + else: + activations, hidden = self.rnn_cell(inputs, hidden) + # Propagate activations through dropout layer. activations = self.dropout(activations) @@ -211,7 +249,10 @@ def forward(self, data_dict): # Log softmax - along PREDICTION dim. if self.use_logsoftmax: outputs = self.log_softmax(outputs) - else: + + # Add predictions to datadict. + data_dict.extend({self.key_predictions: outputs}) + elif self.prediction_mode == "Last": # Pass only the last activation through the output layer. outputs = activations.contiguous()[:, -1, :].squeeze() # Propagate data through the output layer [BATCH_SIZE x PREDICTION_SIZE] @@ -219,6 +260,11 @@ def forward(self, data_dict): # Log softmax - along PREDICTION dim. if self.use_logsoftmax: outputs = self.log_softmax(outputs) - - # Add predictions to datadict. - data_dict.extend({self.key_predictions: outputs}) + # Add predictions to datadict. + data_dict.extend({self.key_predictions: outputs}) + elif self.prediction_mode == "None": + # Nothing, since we don't want to keep the RNN's outputs + pass + + if self.output_last_state: + data_dict.extend({self.key_output_state: hidden}) From 5a7b8482896baffb659816cb9dadf0ff05d9ab67 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 15 Apr 2019 14:39:19 -0700 Subject: [PATCH 02/39] c3_classification_all_bow_vgg16_concat.yml --- ...c3_classification_all_bow_vgg16_concat.yml | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml diff --git a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml new file mode 100644 index 0000000..ab08377 --- /dev/null +++ b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml @@ -0,0 +1,95 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c3_classification/default_c3_classification.yml + +pipeline: + name: vqa_med_c3_classification_all_bow_concat + + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [2, 10, 100] + + # First subpipeline: question. + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + question_onehot_encoder: + type: SentenceOneHotEncoder + priority: 1.2 + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + export_word_mappings_to_globals: True + streams: + inputs: tokenized_questions + outputs: encoded_questions + globals: + vocabulary_size: question_vocabulary_size + + question_bow_encoder: + type: BOWEncoder + priority: 1.3 + streams: + inputs: encoded_questions + outputs: question_activations + globals: + bow_size: question_vocabulary_size + + + # 2nd subpipeline: image size. + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + #hidden_sizes: [100] + priority: 2.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 3rd subpipeline: image. + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 3.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + type: Concatenation + priority: 4.1 + input_streams: [question_activations,image_size_activations,image_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,99],[-1,10],[-1,100]] + output_dims: [-1,209] + streams: + outputs: concatenated_activations + globals: + output_size: output_size + + + classifier: + type: FeedForwardNetwork + hidden_sizes: [100] + priority: 4.2 + streams: + inputs: concatenated_activations + globals: + input_size: output_size + prediction_size: vocabulary_size_c3 + + + #: pipeline From 4bd9856244eff1db257e495e82e2146c5b64ce0a Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 15 Apr 2019 15:29:37 -0700 Subject: [PATCH 03/39] c1 update + all bow + vgg16 + size config --- ...c1_classification_all_bow_vgg16_concat.yml | 94 +++++++++++++++++++ .../default_c1_classification.yml | 3 +- ...c3_classification_all_bow_vgg16_concat.yml | 1 - 3 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml b/configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml new file mode 100644 index 0000000..b2e6ce1 --- /dev/null +++ b/configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml @@ -0,0 +1,94 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml + +pipeline: + name: vqa_med_c1_classification_all_bow_vgg16_concat + + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [2, 10, 100] + + # First subpipeline: question. + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + question_onehot_encoder: + type: SentenceOneHotEncoder + priority: 1.2 + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + export_word_mappings_to_globals: True + streams: + inputs: tokenized_questions + outputs: encoded_questions + globals: + vocabulary_size: question_vocabulary_size + + question_bow_encoder: + type: BOWEncoder + priority: 1.3 + streams: + inputs: encoded_questions + outputs: question_activations + globals: + bow_size: question_vocabulary_size + + + # 2nd subpipeline: image size. + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 2.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 3rd subpipeline: image. + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 3.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + type: Concatenation + priority: 4.1 + input_streams: [question_activations,image_size_activations,image_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,99],[-1,10],[-1,100]] + output_dims: [-1,209] + streams: + outputs: concatenated_activations + globals: + output_size: output_size + + + classifier: + type: FeedForwardNetwork + hidden_sizes: [100] + priority: 4.2 + streams: + inputs: concatenated_activations + globals: + input_size: output_size + prediction_size: vocabulary_size_c1 + + + #: pipeline diff --git a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml index 0ae125a..abf631c 100644 --- a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml +++ b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml @@ -15,7 +15,6 @@ training: validation: problem: categories: C1 - batch_size: 500 dataloader: num_workers: 4 @@ -25,7 +24,7 @@ pipeline: # Answer encoding. answer_indexer: type: LabelIndexer - priority: 2 + priority: 0.1 data_folder: ~/data/vqa-med word_mappings_file: answers.c1.word.mappings.csv # Export mappings and size to globals. diff --git a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml index ab08377..1eea8d1 100644 --- a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml +++ b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml @@ -46,7 +46,6 @@ pipeline: # Model - image size classifier. image_size_encoder: type: FeedForwardNetwork - #hidden_sizes: [100] priority: 2.1 streams: inputs: image_sizes From ea0681cd042d9b49147986439430d5aa92e4f214 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 15 Apr 2019 15:38:28 -0700 Subject: [PATCH 04/39] renaming components in question categorization --- .../question_categorization_onehot_bow.yml | 10 +++++----- .../question_categorization_onehot_rnn.yml | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_onehot_bow.yml b/configs/vqa_med_2019/question_categorization/question_categorization_onehot_bow.yml index 79f767a..ced3ad0 100644 --- a/configs/vqa_med_2019/question_categorization/question_categorization_onehot_bow.yml +++ b/configs/vqa_med_2019/question_categorization/question_categorization_onehot_bow.yml @@ -12,7 +12,7 @@ pipeline: inputs: questions outputs: tokenized_questions - sentence_encoder: + question_encoder: type: SentenceOneHotEncoder priority: 1.2 data_folder: ~/data/vqa-med @@ -22,7 +22,7 @@ pipeline: inputs: tokenized_questions outputs: encoded_questions globals: - vocabulary_size: sentence_vocabulary_size + vocabulary_size: question_vocabulary_size bow_encoder: type: BOWEncoder @@ -31,17 +31,17 @@ pipeline: inputs: encoded_questions outputs: bow_questions globals: - bow_size: sentence_vocabulary_size # Set by sentence_encoder. + bow_size: question_vocabulary_size # Set by question_encoder. # Model classifier: - type: SoftmaxClassifier + type: FeedForwardNetwork #freeze: True priority: 3 streams: inputs: bow_questions globals: - input_size: sentence_vocabulary_size # Set by sentence_encoder. + input_size: question_vocabulary_size # Set by question_encoder. prediction_size: num_categories # C1,C2,C3,C4 # Predictions decoder. diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_onehot_rnn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_onehot_rnn.yml index 47031a6..3c80491 100644 --- a/configs/vqa_med_2019/question_categorization/question_categorization_onehot_rnn.yml +++ b/configs/vqa_med_2019/question_categorization/question_categorization_onehot_rnn.yml @@ -12,7 +12,7 @@ pipeline: inputs: questions outputs: tokenized_questions - sentence_encoder: + question_encoder: type: SentenceOneHotEncoder priority: 1.2 data_folder: ~/data/vqa-med @@ -22,9 +22,9 @@ pipeline: inputs: tokenized_questions outputs: encoded_questions globals: - vocabulary_size: sentence_vocabulary_size + vocabulary_size: question_vocabulary_size - sentence_to_tensor: + question_to_tensor: type: ListToTensor priority: 1.3 num_inputs_dims: 3 @@ -32,7 +32,7 @@ pipeline: inputs: encoded_questions outputs: tensor_questions globals: - input_size: sentence_vocabulary_size + input_size: question_vocabulary_size # Model 1: RNN lstm: @@ -44,7 +44,7 @@ pipeline: streams: inputs: tensor_questions globals: - input_size: sentence_vocabulary_size + input_size: question_vocabulary_size prediction_size: num_categories # C1,C2,C3,C4 #: pipeline From 6bebcd2927f7243b45bc1b7b41d37d81d24f6659 Mon Sep 17 00:00:00 2001 From: tkornut Date: Mon, 15 Apr 2019 16:59:49 -0700 Subject: [PATCH 05/39] c3 rnn+vgg --- .../c1_classification_question_onehot_bow.yml | 6 +- ..._classification_vf_question_onehot_bow.yml | 51 +++++++++ ...c3_classification_all_bow_vgg16_concat.yml | 2 +- ...c3_classification_all_rnn_vgg16_concat.yml | 101 ++++++++++++++++++ 4 files changed, 156 insertions(+), 4 deletions(-) create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml create mode 100644 configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_question_onehot_bow.yml b/configs/vqa_med_2019/c1_classification/c1_classification_question_onehot_bow.yml index cfba4a0..a1d9506 100644 --- a/configs/vqa_med_2019/c1_classification/c1_classification_question_onehot_bow.yml +++ b/configs/vqa_med_2019/c1_classification/c1_classification_question_onehot_bow.yml @@ -22,7 +22,7 @@ pipeline: inputs: tokenized_questions outputs: encoded_questions globals: - vocabulary_size: sentence_vocabulary_size + vocabulary_size: question_vocabulary_size question_bow_encoder: type: BOWEncoder @@ -31,7 +31,7 @@ pipeline: inputs: encoded_questions outputs: bow_questions globals: - bow_size: sentence_vocabulary_size + bow_size: question_vocabulary_size # Model classifier: @@ -41,7 +41,7 @@ pipeline: streams: inputs: bow_questions globals: - input_size: sentence_vocabulary_size + input_size: question_vocabulary_size prediction_size: vocabulary_size_c1 #: pipeline diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml new file mode 100644 index 0000000..1d93dd4 --- /dev/null +++ b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml @@ -0,0 +1,51 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml + +pipeline: + name: vqa_med_c1_classification_vf_question_onehot_bow + + ################# SHARED ################# + + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + question_onehot_encoder: + type: SentenceOneHotEncoder + priority: 1.2 + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + export_word_mappings_to_globals: True + streams: + inputs: tokenized_questions + outputs: encoded_questions + globals: + vocabulary_size: question_vocabulary_size + + question_bow_encoder: + type: BOWEncoder + priority: 1.3 + streams: + inputs: encoded_questions + outputs: bow_questions + globals: + bow_size: question_vocabulary_size + + + + # Model + classifier: + type: FeedForwardNetwork + hidden_sizes: [100, 100] + priority: 3 + streams: + inputs: bow_questions + globals: + input_size: question_vocabulary_size + prediction_size: vocabulary_size_c1 + +#: pipeline diff --git a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml index 1eea8d1..ed3ed6a 100644 --- a/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml +++ b/configs/vqa_med_2019/c3_classification/c3_classification_all_bow_vgg16_concat.yml @@ -2,7 +2,7 @@ default_configs: vqa_med_2019/c3_classification/default_c3_classification.yml pipeline: - name: vqa_med_c3_classification_all_bow_concat + name: vqa_med_c3_classification_all_bow_vgg_concat global_publisher: type: GlobalVariablePublisher diff --git a/configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml new file mode 100644 index 0000000..51b30c6 --- /dev/null +++ b/configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml @@ -0,0 +1,101 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c3_classification/default_c3_classification.yml + +pipeline: + name: vqa_med_c3_classification_all_rnn_vgg_concat + + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [100, 2, 10, 100] + + # First subpipeline: question. + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + use_logsoftmax: False + initial_state_trainable: False + #num_layers: 5 + hidden_size: 50 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_embeddings_output_size + + # 2nd subpipeline: image size. + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 2.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 3rd subpipeline: image. + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 3.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + type: Concatenation + priority: 4.1 + input_streams: [question_activations,image_size_activations,image_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10],[-1,100]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: output_size + + + classifier: + type: FeedForwardNetwork + hidden_sizes: [100] + priority: 4.2 + streams: + inputs: concatenated_activations + globals: + input_size: output_size + prediction_size: vocabulary_size_c3 + + + #: pipeline From 96e23f0a0cc27a6dfd07d50d8a4117605c48f895 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 12:05:44 -0700 Subject: [PATCH 06/39] fixed model name loading (spaces), added yes/no preprocessing and category to vqa_med --- ptp/application/pipeline_manager.py | 6 ++++- .../image_text_to_class/vqa_med_2019.py | 25 +++++++++++++++---- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/ptp/application/pipeline_manager.py b/ptp/application/pipeline_manager.py index c3ccd88..f259523 100644 --- a/ptp/application/pipeline_manager.py +++ b/ptp/application/pipeline_manager.py @@ -257,6 +257,7 @@ def load(self, checkpoint_file): """ # Load checkpoint + checkpoint_file = os.path.expanduser(checkpoint_file.replace(" ","")) # This is to be able to load a CUDA-trained model on CPU chkpt = torch.load(checkpoint_file, map_location=lambda storage, loc: storage) @@ -301,8 +302,11 @@ def load_models(self): try: # Check if file exists. checkpoint_filename = model.config["load"] + # TODO: if checkpoint_file is a list!! + checkpoint_filename = os.path.expanduser(checkpoint_filename.replace(" ","")) + # Check if file exists. if not os.path.isfile(checkpoint_filename): - log_str += "Coud not import parameters of model '{}' from checkpoint {} as file does not exist\n".format( + log_str += "Could not import parameters of model '{}' from checkpoint {} as file does not exist\n".format( model.name, checkpoint_filename ) diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py index ea22976..88dd8c7 100644 --- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py +++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py @@ -96,9 +96,9 @@ def __init__(self, name, config): self.scale_image_width = self.config['scale_image_size'][1] # Set parameters and globals related to categories. - self.globals["num_categories"] = 4 - self.globals["category_word_mappings"] = {'C1': 0, 'C2': 1, 'C3': 2, 'C4': 3, '': 4} - self.category_idx_to_word = {0: 'C1', 1: 'C2', 2: 'C3', 3: 'C4', 4: ''} + self.globals["num_categories"] = 6 + self.globals["category_word_mappings"] = {'C1': 0, 'C2': 1, 'C3': 2, 'C4': 3, 'BINARY': 4, '': 5} + self.category_idx_to_word = {0: 'C1', 1: 'C2', 2: 'C3', 3: 'C4', 4: 'BINARY', 5: ''} # Check if we want to remove punctuation from questions/answer self.remove_punctuation = self.config["remove_punctuation"] @@ -304,12 +304,27 @@ def __getitem__(self, index): data_dict[self.key_answers] = item[self.key_answers] # Question category related variables. - data_dict[self.key_category_ids] = item[self.key_category_ids] - data_dict[self.key_category_names] = self.category_idx_to_word[item[self.key_category_ids]] + # Check if this is binary question. + if self.predict_yes_no(item[self.key_questions]): + data_dict[self.key_category_ids] = 4 # Binary. + data_dict[self.key_category_names] = self.category_idx_to_word[4] + else: + data_dict[self.key_category_ids] = item[self.key_category_ids] + data_dict[self.key_category_names] = self.category_idx_to_word[item[self.key_category_ids]] # Return sample. return data_dict + def predict_yes_no(self, qtext): + """ + Determines whether this is binary (yes/no) type of question. + """ + yes_no_starters = ['is','was','are','does'] + tokens = qtext.split(' ') + first_token = tokens[0] + if first_token in yes_no_starters and ('or' not in tokens): + return True + return False def collate_fn(self, batch): """ From 101d583824935e2ef3533fedffa1971f2449e86c Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 12:07:21 -0700 Subject: [PATCH 07/39] c1 all rnn vgg, updated configs for categorization, c1 vf init --- ...c1_classification_all_rnn_vgg16_concat.yml | 101 ++++++++++++++++ ..._classification_vf_question_onehot_bow.yml | 51 -------- ...question_rnn_separate_q_categorization.yml | 107 +++++++++++++++++ ...cation_vf_shared_question_flow_rnn_ffn.yml | 109 ++++++++++++++++++ .../default_c1_classification.yml | 14 +-- .../default_c3_classification.yml | 14 +-- .../default_question_categorization.yml | 19 +-- .../question_categorization_rnn.yml | 2 - .../question_categorization_rnn_ffn.yml | 65 +++++++++++ 9 files changed, 408 insertions(+), 74 deletions(-) create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml delete mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml create mode 100644 configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml create mode 100644 configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml new file mode 100644 index 0000000..62b4389 --- /dev/null +++ b/configs/vqa_med_2019/c1_classification/c1_classification_all_rnn_vgg16_concat.yml @@ -0,0 +1,101 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml + +pipeline: + name: vqa_med_c1_classification_all_rnn_vgg_concat + + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [100, 2, 10, 100] + + # First subpipeline: question. + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + use_logsoftmax: False + initial_state_trainable: False + #num_layers: 5 + hidden_size: 50 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_embeddings_output_size + + # 2nd subpipeline: image size. + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 2.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 3rd subpipeline: image. + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 3.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + type: Concatenation + priority: 4.1 + input_streams: [question_activations,image_size_activations,image_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10],[-1,100]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: output_size + + + classifier: + type: FeedForwardNetwork + hidden_sizes: [100] + priority: 4.2 + streams: + inputs: concatenated_activations + globals: + input_size: output_size + prediction_size: vocabulary_size_c1 + + + #: pipeline diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml deleted file mode 100644 index 1d93dd4..0000000 --- a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_onehot_bow.yml +++ /dev/null @@ -1,51 +0,0 @@ -# Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml - -pipeline: - name: vqa_med_c1_classification_vf_question_onehot_bow - - ################# SHARED ################# - - # Questions encoding. - question_tokenizer: - type: SentenceTokenizer - priority: 1.1 - streams: - inputs: questions - outputs: tokenized_questions - - question_onehot_encoder: - type: SentenceOneHotEncoder - priority: 1.2 - data_folder: ~/data/vqa-med - word_mappings_file: questions.all.word.mappings.csv - export_word_mappings_to_globals: True - streams: - inputs: tokenized_questions - outputs: encoded_questions - globals: - vocabulary_size: question_vocabulary_size - - question_bow_encoder: - type: BOWEncoder - priority: 1.3 - streams: - inputs: encoded_questions - outputs: bow_questions - globals: - bow_size: question_vocabulary_size - - - - # Model - classifier: - type: FeedForwardNetwork - hidden_sizes: [100, 100] - priority: 3 - streams: - inputs: bow_questions - globals: - input_size: question_vocabulary_size - prediction_size: vocabulary_size_c1 - -#: pipeline diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml new file mode 100644 index 0000000..785881f --- /dev/null +++ b/configs/vqa_med_2019/c1_classification/c1_classification_vf_question_rnn_separate_q_categorization.yml @@ -0,0 +1,107 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml + +pipeline: + name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization + + + ################# SHARED ################# + + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 0.2 + streams: + inputs: questions + outputs: tokenized_questions + + ################# FLOW 0: CATEGORY ################# + + # Model 2: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE + load: ~/Documents/image-clef-2019/experiments/q_categorization/20190416_103111/checkpoints/vqa_med_question_categorization_rnn_best.pt + freeze: True + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE + load: ~/Documents/image-clef-2019/experiments/q_categorization/20190416_103111/checkpoints/vqa_med_question_categorization_rnn_best.pt + freeze: True + prediction_mode: Last + priority: 0.4 + initial_state_trainable: False + streams: + inputs: embedded_questions + predictions: predicted_question_categories + globals: + input_size: embeddings_size + prediction_size: num_categories # C1,C2,C3,C4,BIN,UNK + + + category_decoder: + type: WordDecoder + priority: 0.5 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: predicted_question_categories + outputs: predicted_question_categories_ids + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + ################# FLOW C1: question ################# + + flow_c1_string_to_mask: + type: StringToMask + priority: 1.1 + globals: + word_mappings: category_word_mappings + streams: + strings: predicted_question_categories_ids + string_indices: flow_c1_targets + masks: flow_c1_masks + + # Model 1: Embeddings + flow_c1_question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: flow_c1_embedded_questions + globals: + embeddings_size: flow_c1_embeddings_size + + # Model 2: RNN + flow_c1_lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + initial_state_trainable: False + #num_layers: 5 + hidden_size: 50 + streams: + inputs: flow_c1_embedded_questions + globals: + input_size: flow_c1_embeddings_size + prediction_size: vocabulary_size_c1 + + +#: pipeline diff --git a/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml b/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml new file mode 100644 index 0000000..69f14bf --- /dev/null +++ b/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml @@ -0,0 +1,109 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml + +pipeline: + name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization + + + ################# SHARED ################# + + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 0.2 + streams: + inputs: questions + outputs: tokenized_questions + + ################# FLOW 0: CATEGORY ################# + + # Model 2: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE + load: ~/Documents/image-clef-2019/experiments/q_categorization/ xxx /checkpoints/vqa_med_question_categorization_rnn_best.pt + freeze: True + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE + load: ~/Documents/image-clef-2019/experiments/q_categorization/ xxx /checkpoints/vqa_med_question_categorization_rnn_best.pt + freeze: True + prediction_mode: Last + priority: 0.4 + initial_state_trainable: False + streams: + inputs: embedded_questions + predictions: predicted_question_categories + globals: + input_size: embeddings_size + prediction_size: num_categories # C1,C2,C3,C4,BIN,UNK + + + category_decoder: + type: WordDecoder + priority: 0.5 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: predicted_question_categories + outputs: predicted_question_categories_ids + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + ################# FLOW C1: question ################# + + flow_c1_string_to_mask: + type: StringToMask + priority: 1.1 + globals: + # This has to be changed to {0: C1} + # Question is what to do ewith UNK? + word_mappings: category_word_mappings + streams: + strings: predicted_question_categories_ids + string_indices: flow_c1_targets + masks: flow_c1_masks + + # Model 1: Embeddings + flow_c1_question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: flow_c1_embedded_questions + globals: + embeddings_size: flow_c1_embeddings_size + + # Model 2: RNN + flow_c1_lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + initial_state_trainable: False + #num_layers: 5 + hidden_size: 50 + streams: + inputs: flow_c1_embedded_questions + globals: + input_size: flow_c1_embeddings_size + prediction_size: vocabulary_size_c1 + + +#: pipeline diff --git a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml index abf631c..27bd757 100644 --- a/configs/vqa_med_2019/c1_classification/default_c1_classification.yml +++ b/configs/vqa_med_2019/c1_classification/default_c1_classification.yml @@ -60,15 +60,15 @@ pipeline: loss: loss # Statistics. - accuracy: - type: AccuracyStatistics - priority: 100.1 - streams: - targets: answers_ids - batch_size: type: BatchSizeStatistics - priority: 100.2 + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: answers_ids precision_recall: type: PrecisionRecallStatistics diff --git a/configs/vqa_med_2019/c3_classification/default_c3_classification.yml b/configs/vqa_med_2019/c3_classification/default_c3_classification.yml index 863ed6a..e3d2a39 100644 --- a/configs/vqa_med_2019/c3_classification/default_c3_classification.yml +++ b/configs/vqa_med_2019/c3_classification/default_c3_classification.yml @@ -60,15 +60,15 @@ pipeline: loss: loss # Statistics. - accuracy: - type: AccuracyStatistics - priority: 100.1 - streams: - targets: answers_ids - batch_size: type: BatchSizeStatistics - priority: 100.2 + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: answers_ids precision_recall: type: PrecisionRecallStatistics diff --git a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml index e39d87b..b263e62 100644 --- a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml +++ b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml @@ -1,6 +1,11 @@ # Load config defining problems for training, validation and testing. default_configs: vqa_med_2019/default_vqa_med_2019.yml +training: + # settings parameters + terminal_conditions: + loss_stop: 1.0e-3 + pipeline: # Predictions decoder. @@ -26,15 +31,15 @@ pipeline: loss: loss # Statistics. - accuracy: - type: AccuracyStatistics - priority: 100.1 - streams: - targets: category_ids - batch_size: type: BatchSizeStatistics - priority: 100.2 + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: category_ids precision_recall: type: PrecisionRecallStatistics diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml index 16035d1..532cdea 100644 --- a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml +++ b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml @@ -31,8 +31,6 @@ pipeline: prediction_mode: Last priority: 3 initial_state_trainable: False - #num_layers: 5 - #hidden_size: 1000 streams: inputs: embedded_questions globals: diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml new file mode 100644 index 0000000..7988a86 --- /dev/null +++ b/configs/vqa_med_2019/question_categorization/question_categorization_rnn_ffn.yml @@ -0,0 +1,65 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/question_categorization/default_question_categorization.yml + +pipeline: + name: vqa_med_question_categorization_rnn_ffn + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [100, 2, 10, 100] + + + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + # Model 2: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: lstm_activations_questions + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model + classifier: + type: FeedForwardNetwork + hidden: [50] + priority: 1.4 + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + +#: pipeline From 9665d985edf027c14d33089ef333fe912d268d74 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 12:45:55 -0700 Subject: [PATCH 08/39] Added option to indicate model name while loading --- ptp/application/pipeline_manager.py | 26 +++++++++++++++++++++----- ptp/components/models/model.py | 10 +++++++--- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/ptp/application/pipeline_manager.py b/ptp/application/pipeline_manager.py index f259523..09f67ed 100644 --- a/ptp/application/pipeline_manager.py +++ b/ptp/application/pipeline_manager.py @@ -300,13 +300,28 @@ def load_models(self): for model in self.models: if "load" in model.config.keys(): try: + # Determine whether checkpoint is a string (filename) or list. + checkpoint = model.config["load"] + print(checkpoint.to_dict()) + if type(checkpoint) == str: + checkpoint_filename = checkpoint + checkpoint_model = None + else: # Assume dictionary. + if 'file' not in checkpoint.keys() or 'model' not in checkpoint.keys(): + log_str += "The 'load' section of model '{}' from checkpoint '{}' is incorrect: it must contain a single string (with checkpoint filename) or dictionary (with two sections: checkpoint 'file' and 'model' to load)".format( + model.name, + checkpoint_filename + ) + error = True + continue + # Ok! + checkpoint_filename = checkpoint["file"] + checkpoint_model = checkpoint["model"] + # Check if file exists. - checkpoint_filename = model.config["load"] - # TODO: if checkpoint_file is a list!! checkpoint_filename = os.path.expanduser(checkpoint_filename.replace(" ","")) - # Check if file exists. if not os.path.isfile(checkpoint_filename): - log_str += "Could not import parameters of model '{}' from checkpoint {} as file does not exist\n".format( + log_str += "Could not import parameters of model '{}' from checkpoint '{}' as file does not exist\n".format( model.name, checkpoint_filename ) @@ -326,7 +341,8 @@ def load_models(self): chkpt['status'] ) # Load model. - model.load_from_checkpoint(chkpt) + model.load_from_checkpoint(chkpt, checkpoint_model) + log_str += " + Model '{}' [{}] params loaded\n".format(model.name, type(model).__name__) except KeyError: log_str += " + Model '{}' [{}] params not found in checkpoint!\n".format(model.name, type(model).__name__) diff --git a/ptp/components/models/model.py b/ptp/components/models/model.py index f907111..f89ad57 100644 --- a/ptp/components/models/model.py +++ b/ptp/components/models/model.py @@ -73,13 +73,17 @@ def save_to_checkpoint(self, chkpt): chkpt[self.name] = self.state_dict() - def load_from_checkpoint(self, chkpt): + def load_from_checkpoint(self, chkpt, section=None): """ Loads state dictionary from checkpoint. - :param: Checkpoint (dictionary) loaded from file. + :param chkpt: Checkpoint (dictionary) loaded from file. + + :param section: Name of the section containing params (DEFAULT: None, means that model name from current configuration will be used)\ """ - self.load_state_dict(chkpt[self.name]) + if section is None: + section = self.name + self.load_state_dict(chkpt[section]) def freeze(self): """ From d1fad47a98f67b7e87ccc255d8b777d2bbd236bb Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 12:52:25 -0700 Subject: [PATCH 09/39] Cleaned up logging comments while loading models --- ptp/application/pipeline_manager.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ptp/application/pipeline_manager.py b/ptp/application/pipeline_manager.py index 09f67ed..a6b550a 100644 --- a/ptp/application/pipeline_manager.py +++ b/ptp/application/pipeline_manager.py @@ -295,22 +295,20 @@ def load_models(self): The 'load' variable should contain path with filename of the checkpoint from which we want to load particular model. """ error = False - log_str = '' + log_str = 'Trying to load the pre-trained models:\n' # Iterate over models. for model in self.models: if "load" in model.config.keys(): try: # Determine whether checkpoint is a string (filename) or list. checkpoint = model.config["load"] - print(checkpoint.to_dict()) if type(checkpoint) == str: checkpoint_filename = checkpoint checkpoint_model = None else: # Assume dictionary. if 'file' not in checkpoint.keys() or 'model' not in checkpoint.keys(): - log_str += "The 'load' section of model '{}' from checkpoint '{}' is incorrect: it must contain a single string (with checkpoint filename) or dictionary (with two sections: checkpoint 'file' and 'model' to load)".format( - model.name, - checkpoint_filename + log_str += " + The 'load' section of model '{}' is incorrect: it must contain a single string (with checkpoint filename) or a dictionary (with two sections: checkpoint 'file' and 'model' to load)\n".format( + model.name ) error = True continue @@ -321,7 +319,7 @@ def load_models(self): # Check if file exists. checkpoint_filename = os.path.expanduser(checkpoint_filename.replace(" ","")) if not os.path.isfile(checkpoint_filename): - log_str += "Could not import parameters of model '{}' from checkpoint '{}' as file does not exist\n".format( + log_str += " + Could not import parameters of model '{}' from checkpoint '{}' as file does not exist\n".format( model.name, checkpoint_filename ) @@ -332,7 +330,7 @@ def load_models(self): # This is to be able to load a CUDA-trained model on CPU chkpt = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage) - log_str += "Importing model '{}' from pipeline '{}' parameters from checkpoint from {} (episode: {}, loss: {}, status: {}):\n".format( + log_str += " + Importing model '{}' from pipeline '{}' parameters from checkpoint from {} (episode: {}, loss: {}, status: {})\n".format( model.name, chkpt['name'], chkpt['timestamp'], From 7611a5bbf902e7fdfc5311a28ceb47b8cadb7826 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 13:01:59 -0700 Subject: [PATCH 10/39] C1 variational flow - shared rnn with question categorization --- ...cation_vf_shared_question_flow_rnn_ffn.yml | 127 ++++++++++++++++++ ...cation_vf_shared_question_flow_rnn_ffn.yml | 109 --------------- .../question_categorization_rnn.yml | 2 +- 3 files changed, 128 insertions(+), 110 deletions(-) create mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml delete mode 100644 configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml new file mode 100644 index 0000000..5c2c7b8 --- /dev/null +++ b/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml @@ -0,0 +1,127 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml + +pipeline: + name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization + + + ################# FLOW 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [100, 2, 10, 100] + + # Questions encoding. + flow0_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: question embeddings + flow0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + flow0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: lstm_activations_questions + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + ################# FLOW 1: CATEGORY ################# + + # Model 3: FFN question category + flow1_classifier: + priority: 1.1 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: predicted_question_categories + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + + flow1_category_decoder: + priority: 1.2 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: predicted_question_categories + outputs: predicted_question_categories_ids + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + ################# FLOW C1: question ################# + + flow2_c1_string_to_mask: + priority: 2.1 + type: StringToMask + globals: + # This has to be changed to {0: C1} + # Question is what to do ewith UNK? + word_mappings: category_word_mappings + streams: + strings: predicted_question_categories_ids + string_indices: flow_c1_targets + masks: flow_c1_masks + + # Model 4: FFN C1 answering + flow2_c1_lstm: + priority: 2.2 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: vocabulary_size_c1 + + +#: pipeline diff --git a/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml b/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml deleted file mode 100644 index 69f14bf..0000000 --- a/configs/vqa_med_2019/c1_classification/c_classification_vf_shared_question_flow_rnn_ffn.yml +++ /dev/null @@ -1,109 +0,0 @@ -# Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml - -pipeline: - name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization - - - ################# SHARED ################# - - # Questions encoding. - question_tokenizer: - type: SentenceTokenizer - priority: 0.2 - streams: - inputs: questions - outputs: tokenized_questions - - ################# FLOW 0: CATEGORY ################# - - # Model 2: Embeddings - question_embeddings: - type: SentenceEmbeddings - priority: 0.3 - # LOAD AND FREEZE - load: ~/Documents/image-clef-2019/experiments/q_categorization/ xxx /checkpoints/vqa_med_question_categorization_rnn_best.pt - freeze: True - embeddings_size: 50 - pretrained_embeddings_file: glove.6B.50d.txt - data_folder: ~/data/vqa-med - word_mappings_file: questions.all.word.mappings.csv - streams: - inputs: tokenized_questions - outputs: embedded_questions - - # Model 2: RNN - lstm: - type: RecurrentNeuralNetwork - cell_type: LSTM - # LOAD AND FREEZE - load: ~/Documents/image-clef-2019/experiments/q_categorization/ xxx /checkpoints/vqa_med_question_categorization_rnn_best.pt - freeze: True - prediction_mode: Last - priority: 0.4 - initial_state_trainable: False - streams: - inputs: embedded_questions - predictions: predicted_question_categories - globals: - input_size: embeddings_size - prediction_size: num_categories # C1,C2,C3,C4,BIN,UNK - - - category_decoder: - type: WordDecoder - priority: 0.5 - # Use the same word mappings as label indexer. - import_word_mappings_from_globals: True - streams: - inputs: predicted_question_categories - outputs: predicted_question_categories_ids - globals: - vocabulary_size: num_categories - word_mappings: category_word_mappings - - ################# FLOW C1: question ################# - - flow_c1_string_to_mask: - type: StringToMask - priority: 1.1 - globals: - # This has to be changed to {0: C1} - # Question is what to do ewith UNK? - word_mappings: category_word_mappings - streams: - strings: predicted_question_categories_ids - string_indices: flow_c1_targets - masks: flow_c1_masks - - # Model 1: Embeddings - flow_c1_question_embeddings: - type: SentenceEmbeddings - priority: 1.2 - embeddings_size: 50 - pretrained_embeddings_file: glove.6B.50d.txt - data_folder: ~/data/vqa-med - word_mappings_file: questions.all.word.mappings.csv - streams: - inputs: tokenized_questions - outputs: flow_c1_embedded_questions - globals: - embeddings_size: flow_c1_embeddings_size - - # Model 2: RNN - flow_c1_lstm: - type: RecurrentNeuralNetwork - cell_type: LSTM - prediction_mode: Last - priority: 1.3 - initial_state_trainable: False - #num_layers: 5 - hidden_size: 50 - streams: - inputs: flow_c1_embedded_questions - globals: - input_size: flow_c1_embeddings_size - prediction_size: vocabulary_size_c1 - - -#: pipeline diff --git a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml index 532cdea..7097041 100644 --- a/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml +++ b/configs/vqa_med_2019/question_categorization/question_categorization_rnn.yml @@ -35,6 +35,6 @@ pipeline: inputs: embedded_questions globals: input_size: embeddings_size - prediction_size: num_categories # C1,C2,C3,C4 + prediction_size: num_categories # C1,C2,C3,C4,BIN,UNK #: pipeline From 4faa4bc0abeccf8e7216ab0bf6ce8b97e27e437a Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 15:48:52 -0700 Subject: [PATCH 11/39] Added out_of_vocabulary to LabelIndexer, first VQAMED variational flow - c1 and binary --- .../default/components/text/label_indexer.yml | 4 + ...cation_vf_shared_question_flow_rnn_ffn.yml | 127 -------- ...es_shared_question_rnn_two_ffns_losses.yml | 273 ++++++++++++++++++ ptp/components/text/label_indexer.py | 9 +- 4 files changed, 285 insertions(+), 128 deletions(-) delete mode 100644 configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml create mode 100644 configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml diff --git a/configs/default/components/text/label_indexer.yml b/configs/default/components/text/label_indexer.yml index 45f6e6f..5b871e9 100644 --- a/configs/default/components/text/label_indexer.yml +++ b/configs/default/components/text/label_indexer.yml @@ -25,6 +25,10 @@ import_word_mappings_from_globals: False # Flag informing whether word mappings will be exported to globals (LOADED) export_word_mappings_to_globals: False +# Value that will be used when word is out of vocavbulary (LOADED) +# (Mask for that element will be 0 as well) +out_of_vocabulary_value: -1 + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml b/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml deleted file mode 100644 index 5c2c7b8..0000000 --- a/configs/vqa_med_2019/c1_classification/c1_classification_vf_shared_question_flow_rnn_ffn.yml +++ /dev/null @@ -1,127 +0,0 @@ -# Load config defining problems for training, validation and testing. -default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml - -pipeline: - name: vqa_med_c1_classification_vf_question_rnn_separate_q_categorization - - - ################# FLOW 0: SHARED ################# - - # Add global variables. - global_publisher: - type: GlobalVariablePublisher - priority: 0 - # Add input_size to globals. - keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] - values: [100, 2, 10, 100] - - # Questions encoding. - flow0_question_tokenizer: - priority: 0.2 - type: SentenceTokenizer - streams: - inputs: questions - outputs: tokenized_questions - - # Model 1: question embeddings - flow0_question_embeddings: - type: SentenceEmbeddings - priority: 0.3 - # LOAD AND FREEZE # - load: - file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt - model: question_embeddings - freeze: True - ################### - embeddings_size: 50 - pretrained_embeddings_file: glove.6B.50d.txt - data_folder: ~/data/vqa-med - word_mappings_file: questions.all.word.mappings.csv - streams: - inputs: tokenized_questions - outputs: embedded_questions - - # Model 2: question RNN - flow0_lstm: - priority: 0.4 - type: RecurrentNeuralNetwork - cell_type: LSTM - # LOAD AND FREEZE # - load: - file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt - model: lstm - freeze: True - ################### - prediction_mode: Last - initial_state_trainable: True - use_logsoftmax: False - dropout_rate: 0.5 - streams: - inputs: embedded_questions - predictions: lstm_activations_questions - globals: - input_size: embeddings_size - prediction_size: question_lstm_output_size - - ################# FLOW 1: CATEGORY ################# - - # Model 3: FFN question category - flow1_classifier: - priority: 1.1 - type: FeedForwardNetwork - # LOAD AND FREEZE # - load: - file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt - model: classifier - freeze: True - ################### - hidden: [50] - dropout_rate: 0.5 - streams: - inputs: lstm_activations_questions - predictions: predicted_question_categories - globals: - input_size: question_lstm_output_size # Set by global publisher - prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK - - - flow1_category_decoder: - priority: 1.2 - type: WordDecoder - # Use the same word mappings as label indexer. - import_word_mappings_from_globals: True - streams: - inputs: predicted_question_categories - outputs: predicted_question_categories_ids - globals: - vocabulary_size: num_categories - word_mappings: category_word_mappings - - ################# FLOW C1: question ################# - - flow2_c1_string_to_mask: - priority: 2.1 - type: StringToMask - globals: - # This has to be changed to {0: C1} - # Question is what to do ewith UNK? - word_mappings: category_word_mappings - streams: - strings: predicted_question_categories_ids - string_indices: flow_c1_targets - masks: flow_c1_masks - - # Model 4: FFN C1 answering - flow2_c1_lstm: - priority: 2.2 - type: FeedForwardNetwork - hidden: [50] - dropout_rate: 0.5 - streams: - inputs: lstm_activations_questions - globals: - input_size: question_lstm_output_size # Set by global publisher - prediction_size: vocabulary_size_c1 - - -#: pipeline diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml new file mode 100644 index 0000000..f4172bd --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml @@ -0,0 +1,273 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses + + ################# FLOW 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + + + + ################# FLOW 0: SHARED QUESTION ################# + + # Questions encoding. + flow0_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: question embeddings + flow0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + #load: + # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + # model: question_embeddings + #freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + flow0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + #load: + # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + # model: lstm + #freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: lstm_activations_questions + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + ################# FLOW 1: CATEGORY ################# + + # Model 3: FFN question category + flow1_classifier: + priority: 1.1 + type: FeedForwardNetwork + # LOAD AND FREEZE # + #load: + # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + # model: classifier + #freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + + flow1_category_decoder: + priority: 1.2 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: predicted_question_categories_preds + outputs: predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + ################# FLOW 2: C1 question ################# + + # Answer encoding for flow 2. + flow2_c1_answer_indexer: + type: LabelIndexer + priority: 2.0 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: flow2_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + flow2_c1_string_to_mask: + priority: 2.1 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: category_names # predicted_question_categories_names ## FOR NOW! + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: flow2_c1_masks + + # Model 4: FFN C1 answering + flow2_c1_lstm: + priority: 2.2 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: flow2_c1_predictions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: vocabulary_size_c1_without_yn + + flow2_c1_nllloss: + type: NLLLoss + priority: 2.3 + targets_dim: 1 + use_masking: True + streams: + predictions: flow2_c1_predictions + masks: flow2_c1_masks + targets: flow2_c1_answers_without_yn_ids + loss: flow2_c1_loss + + flow2_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 2.4 + use_word_mappings: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: flow2_c1_masks + predictions: flow2_c1_predictions + targets: flow2_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + num_classes: vocabulary_size_c1_without_yn + statistics: + precision: flow2_c1_precision + recall: flow2_c1_recall + f1score: flow2_c1_f1score + + ################# FLOW 3: BINARY question ################# + + # Answer encoding for flow 3. + flow3_binary_answer_indexer: + type: LabelIndexer + priority: 3.0 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: flow3_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + flow3_binary_string_to_mask: + priority: 3.1 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: category_names # predicted_question_categories_names ## FOR NOW! + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: flow3_binary_masks + + # Model 4: FFN C1 answering + flow3_binary_lstm: + priority: 3.2 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: flow3_binary_predictions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: vocabulary_size_binary_yn + + flow3_binary_nllloss: + type: NLLLoss + priority: 3.3 + targets_dim: 1 + use_masking: True + streams: + predictions: flow3_binary_predictions + masks: flow3_binary_masks + targets: flow3_binary_answers_ids + loss: flow3_binary_loss + + flow3_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 3.4 + use_word_mappings: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: flow3_binary_masks + predictions: flow3_binary_predictions + targets: flow3_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + num_classes: vocabulary_size_binary_yn + statistics: + precision: flow3_binary_precision + recall: flow3_binary_recall + f1score: flow3_binary_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 100.1 + input_streams: questions,answers, category_names,predicted_question_categories_names, flow2_c1_masks,flow2_c1_answers_without_yn_ids,flow2_c1_predictions, flow3_binary_masks,flow3_binary_answers_ids,flow3_binary_predictions + + +#: pipeline diff --git a/ptp/components/text/label_indexer.py b/ptp/components/text/label_indexer.py index f9728ea..410aa46 100644 --- a/ptp/components/text/label_indexer.py +++ b/ptp/components/text/label_indexer.py @@ -42,6 +42,9 @@ def __init__(self, name, config): self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] + # Get value from configuration. + self.out_of_vocabulary_value = self.config["out_of_vocabulary_value"] + def input_data_definitions(self): """ @@ -81,7 +84,11 @@ def __call__(self, data_dict): for sample in inputs: assert not isinstance(sample, (list,)), 'This encoder requires input sample to contain a single word' # Process single token. - output_sample = self.word_to_ix[sample] + if sample in self.word_to_ix.keys(): + output_sample = self.word_to_ix[sample] + else: + # Word out of vocabulary. + output_sample = self.out_of_vocabulary_value outputs_list.append(output_sample) # Transform to tensor. output_tensor = torch.tensor(outputs_list) From a6133f7bdab47cc349da0a52289e332b262a9fb0 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 16:34:26 -0700 Subject: [PATCH 12/39] pipe with pretrained categorization and two losses for C1 and binary questions --- ...on_shared_question_rnn_two_ffns_losses.yml | 366 ++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml diff --git a/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml new file mode 100644 index 0000000..eb883db --- /dev/null +++ b/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml @@ -0,0 +1,366 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1 + dataloader: + num_workers: 4 + + +pipeline: + name: c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml + + ################# FLOW 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + ################# FLOW 0: SHARED QUESTION ################# + + # Questions encoding. + flow0_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: question embeddings + flow0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + #load: + # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + # model: question_embeddings + #freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + flow0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + #load: + # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + # model: lstm + #freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: lstm_activations_questions + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + flow0_all_answer_indexer: + type: LabelIndexer + priority: 0.6 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c1_binasry + word_mappings: word_mappings_all_c1_binary + + + ################# FLOW 1: CATEGORY ################# + + # Model 1: question embeddings + flow1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: flow1_embedded_questions + + # Model 2: question RNN + flow1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: flow1_embedded_questions + predictions: flow1_lstm_activations_questions + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + flow1_classifier: + priority: 1.3 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: flow1_lstm_activations_questions + predictions: flow1_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + flow1_category_decoder: + priority: 1.4 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: flow1_predicted_question_categories_preds + outputs: flow1_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + flow1_category_accuracy: + type: AccuracyStatistics + priority: 1.5 + streams: + targets: category_ids + predictions: flow1_predicted_question_categories_preds + + ################# FLOW 2: C1 question ################# + + # Answer encoding for flow 2. + flow2_c1_answer_indexer: + type: LabelIndexer + priority: 2.0 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: flow2_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + flow2_c1_string_to_mask: + priority: 2.1 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: flow1_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: flow2_c1_masks + + # Model 4: FFN C1 answering + flow2_c1_lstm: + priority: 2.2 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: flow2_c1_predictions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: vocabulary_size_c1_without_yn + + flow2_c1_nllloss: + type: NLLLoss + priority: 2.3 + targets_dim: 1 + use_masking: True + streams: + predictions: flow2_c1_predictions + masks: flow2_c1_masks + targets: flow2_c1_answers_without_yn_ids + loss: flow2_c1_loss + + flow2_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 2.4 + use_word_mappings: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: flow2_c1_masks + predictions: flow2_c1_predictions + targets: flow2_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + num_classes: vocabulary_size_c1_without_yn + statistics: + precision: flow2_c1_precision + recall: flow2_c1_recall + f1score: flow2_c1_f1score + + ################# FLOW 3: BINARY question ################# + + # Answer encoding for flow 3. + flow3_binary_answer_indexer: + type: LabelIndexer + priority: 3.0 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: flow3_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + flow3_binary_string_to_mask: + priority: 3.1 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: flow1_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: flow3_binary_masks + + # Model 4: FFN C1 answering + flow3_binary_lstm: + priority: 3.2 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: lstm_activations_questions + predictions: flow3_binary_predictions + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: vocabulary_size_binary_yn + + flow3_binary_nllloss: + type: NLLLoss + priority: 3.3 + targets_dim: 1 + use_masking: True + streams: + predictions: flow3_binary_predictions + masks: flow3_binary_masks + targets: flow3_binary_answers_ids + loss: flow3_binary_loss + + flow3_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 3.4 + use_word_mappings: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: flow3_binary_masks + predictions: flow3_binary_predictions + targets: flow3_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + num_classes: vocabulary_size_binary_yn + statistics: + precision: flow3_binary_precision + recall: flow3_binary_recall + f1score: flow3_binary_f1score + + ################# FLOW 4: MERGE ANSWERS ################# + + merged_predictions: + type: JoinMaskedPredictions + priority: 4.1 + # Names of used input streams. + input_prediction_streams: [flow2_c1_predictions, flow3_binary_predictions] + input_mask_streams: [flow2_c1_masks, flow3_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_binary + streams: + output_strings: merged_predictions + output_indices: merged_pred_indices + + # Statistics. + merged_precision_recall: + type: PrecisionRecallStatistics + priority: 4.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_binary + streams: + targets: all_answers_ids + predictions: merged_pred_indices + statistics: + precision: merged_precision + recall: merged_recall + f1score: merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 4.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, flow2_c1_masks,flow2_c1_answers_without_yn_ids,flow2_c1_predictions, flow3_binary_masks,flow3_binary_answers_ids,flow3_binary_predictions, merged_predictions + + +#: pipeline From 95776c892d5d4d4a674bbec2271ac2b72779b34d Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 16:36:11 -0700 Subject: [PATCH 13/39] rename --- ...gory_classification_shared_question_rnn_two_ffns_losses.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename configs/vqa_med_2019/vf/{c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml => c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml} (99%) diff --git a/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml similarity index 99% rename from configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml rename to configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml index eb883db..488d8d6 100644 --- a/configs/vqa_med_2019/vf/c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml @@ -20,7 +20,7 @@ validation: pipeline: - name: c2_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml + name: c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses ################# FLOW 0: SHARED ################# From 33d296c068516f1e3501ae9f12f0cac23321543a Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 16:49:08 -0700 Subject: [PATCH 14/39] statistics typo fix in config - c1+binary vf --- ...ry_classification_shared_question_rnn_two_ffns_losses.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml index 488d8d6..f5763e3 100644 --- a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml @@ -182,6 +182,8 @@ pipeline: streams: targets: category_ids predictions: flow1_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy ################# FLOW 2: C1 question ################# @@ -324,7 +326,8 @@ pipeline: ################# FLOW 4: MERGE ANSWERS ################# - merged_predictions: + # Merge predictions + merged_predictions: type: JoinMaskedPredictions priority: 4.1 # Names of used input streams. From ee39ace2111e481c4ac8b0d00c1e6f57ad87c4b1 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 17:05:35 -0700 Subject: [PATCH 15/39] join masked predictions test --- ptp/components/masking/join_masked_predictions.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ptp/components/masking/join_masked_predictions.py b/ptp/components/masking/join_masked_predictions.py index fa0eb3d..0b418ea 100644 --- a/ptp/components/masking/join_masked_predictions.py +++ b/ptp/components/masking/join_masked_predictions.py @@ -123,10 +123,12 @@ def __call__(self, data_dict): - "outputs": added output field containing tensor [BATCH_SIZE x ...] """ - # Get inputs masks + # Get inputs masks. masks = [] for imsk in self.input_mask_stream_keys: - masks.append(data_dict[imsk].data.cpu().numpy()) + # Get mask from stream. + mask = data_dict[imsk] + masks.append(mask.data.cpu().numpy()) # Sum all masks and make sure that they are complementary. masks_sum = np.sum(masks, axis=0) From b6057949067f4ca39efea3de203ff57b31f4cb8c Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 17:53:17 -0700 Subject: [PATCH 16/39] Fixed masking in P/R flow 2 and 3 --- ...egory_classification_shared_question_rnn_two_ffns_losses.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml index f5763e3..a7e755d 100644 --- a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml @@ -241,6 +241,7 @@ pipeline: type: PrecisionRecallStatistics priority: 2.4 use_word_mappings: True + use_masking: True #show_class_scores: True #show_confusion_matrix: True streams: @@ -310,6 +311,7 @@ pipeline: type: PrecisionRecallStatistics priority: 3.4 use_word_mappings: True + use_masking: True #show_class_scores: True #show_confusion_matrix: True streams: From edeeed9fcb1af28a45874361e1a90b4fa54db3fd Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 17:55:54 -0700 Subject: [PATCH 17/39] cleanup c1 binary hardcoded categories from problem --- ...es_shared_question_rnn_two_ffns_losses.yml | 35 ------------------- 1 file changed, 35 deletions(-) diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml index f4172bd..0065fb3 100644 --- a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml @@ -37,9 +37,6 @@ pipeline: type: BatchSizeStatistics priority: 0.1 - - - ################# FLOW 0: SHARED QUESTION ################# # Questions encoding. @@ -92,38 +89,6 @@ pipeline: ################# FLOW 1: CATEGORY ################# - # Model 3: FFN question category - flow1_classifier: - priority: 1.1 - type: FeedForwardNetwork - # LOAD AND FREEZE # - #load: - # file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt - # model: classifier - #freeze: True - ################### - hidden: [50] - dropout_rate: 0.5 - streams: - inputs: lstm_activations_questions - predictions: predicted_question_categories_preds - globals: - input_size: question_lstm_output_size # Set by global publisher - prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK - - - flow1_category_decoder: - priority: 1.2 - type: WordDecoder - # Use the same word mappings as label indexer. - import_word_mappings_from_globals: True - streams: - inputs: predicted_question_categories_preds - outputs: predicted_question_categories_names - globals: - vocabulary_size: num_categories - word_mappings: category_word_mappings - ################# FLOW 2: C1 question ################# # Answer encoding for flow 2. From 6d4200a651ea7f4d362bf9e2d085eda70685da11 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 18:04:41 -0700 Subject: [PATCH 18/39] masking for hardcoded c1 binary P/R --- ...hardcoded_categories_shared_question_rnn_two_ffns_losses.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml index 0065fb3..d5742cd 100644 --- a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml @@ -144,6 +144,7 @@ pipeline: flow2_c1_precision_recall: type: PrecisionRecallStatistics priority: 2.4 + use_masking: True use_word_mappings: True #show_class_scores: True #show_confusion_matrix: True @@ -213,6 +214,7 @@ pipeline: flow3_binary_precision_recall: type: PrecisionRecallStatistics priority: 3.4 + use_masking: True use_word_mappings: True #show_class_scores: True #show_confusion_matrix: True From 6a07a416f17285318e63f38b17da694dbb9310ad Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 16 Apr 2019 18:53:21 -0700 Subject: [PATCH 19/39] cleanups and config for c1_bin shared all encoders --- ...d_shared_question_rnn_two_ffns_losses.yml} | 96 ++--- ...nn_shared_all_encoders_two_ffns_losses.yml | 401 ++++++++++++++++++ ...n_shared_question_rnn_two_ffns_losses.yml} | 138 +++--- 3 files changed, 518 insertions(+), 117 deletions(-) rename configs/vqa_med_2019/vf/{c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml => c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml} (72%) create mode 100644 configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml rename configs/vqa_med_2019/vf/{c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml => c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml} (74%) diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml similarity index 72% rename from configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml rename to configs/vqa_med_2019/vf/c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml index d5742cd..4c2fe60 100644 --- a/configs/vqa_med_2019/vf/c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses.yml @@ -20,9 +20,9 @@ validation: pipeline: - name: c1_binary_vf_hardcoded_categories_shared_question_rnn_two_ffns_losses + name: c1_binary_vf_cat_hard_shared_question_rnn_two_ffns_losses - ################# FLOW 0: SHARED ################# + ################# PIPE 0: SHARED ################# # Add global variables. global_publisher: @@ -37,10 +37,10 @@ pipeline: type: BatchSizeStatistics priority: 0.1 - ################# FLOW 0: SHARED QUESTION ################# + ################# PIPE 0: SHARED QUESTION ################# # Questions encoding. - flow0_question_tokenizer: + pipe0_question_tokenizer: priority: 0.2 type: SentenceTokenizer streams: @@ -48,7 +48,7 @@ pipeline: outputs: tokenized_questions # Model 1: question embeddings - flow0_question_embeddings: + pipe0_question_embeddings: type: SentenceEmbeddings priority: 0.3 # LOAD AND FREEZE # @@ -66,7 +66,7 @@ pipeline: outputs: embedded_questions # Model 2: question RNN - flow0_lstm: + pipe0_lstm: priority: 0.4 type: RecurrentNeuralNetwork cell_type: LSTM @@ -87,12 +87,12 @@ pipeline: input_size: embeddings_size prediction_size: question_lstm_output_size - ################# FLOW 1: CATEGORY ################# + ################# PIPE 1: CATEGORY ################# - ################# FLOW 2: C1 question ################# + ################# PIPE 2: C1 question ################# - # Answer encoding for flow 2. - flow2_c1_answer_indexer: + # Answer encoding for pipe 2. + pipe2_c1_answer_indexer: type: LabelIndexer priority: 2.0 data_folder: ~/data/vqa-med @@ -101,13 +101,13 @@ pipeline: export_word_mappings_to_globals: True streams: inputs: answers - outputs: flow2_c1_answers_without_yn_ids + outputs: pipe2_c1_answers_without_yn_ids globals: vocabulary_size: vocabulary_size_c1_without_yn word_mappings: word_mappings_c1_without_yn # Sample masking based on categories. - flow2_c1_string_to_mask: + pipe2_c1_string_to_mask: priority: 2.1 type: StringToMask globals: @@ -115,33 +115,33 @@ pipeline: streams: strings: category_names # predicted_question_categories_names ## FOR NOW! string_indices: predicted_c1_question_categories_indices # NOT USED - masks: flow2_c1_masks + masks: pipe2_c1_masks # Model 4: FFN C1 answering - flow2_c1_lstm: + pipe2_c1_lstm: priority: 2.2 type: FeedForwardNetwork hidden: [50] dropout_rate: 0.5 streams: inputs: lstm_activations_questions - predictions: flow2_c1_predictions + predictions: pipe2_c1_predictions globals: input_size: question_lstm_output_size # Set by global publisher prediction_size: vocabulary_size_c1_without_yn - flow2_c1_nllloss: + pipe2_c1_nllloss: type: NLLLoss priority: 2.3 targets_dim: 1 use_masking: True streams: - predictions: flow2_c1_predictions - masks: flow2_c1_masks - targets: flow2_c1_answers_without_yn_ids - loss: flow2_c1_loss + predictions: pipe2_c1_predictions + masks: pipe2_c1_masks + targets: pipe2_c1_answers_without_yn_ids + loss: pipe2_c1_loss - flow2_c1_precision_recall: + pipe2_c1_precision_recall: type: PrecisionRecallStatistics priority: 2.4 use_masking: True @@ -149,21 +149,21 @@ pipeline: #show_class_scores: True #show_confusion_matrix: True streams: - masks: flow2_c1_masks - predictions: flow2_c1_predictions - targets: flow2_c1_answers_without_yn_ids + masks: pipe2_c1_masks + predictions: pipe2_c1_predictions + targets: pipe2_c1_answers_without_yn_ids globals: word_mappings: word_mappings_c1_without_yn num_classes: vocabulary_size_c1_without_yn statistics: - precision: flow2_c1_precision - recall: flow2_c1_recall - f1score: flow2_c1_f1score + precision: pipe2_c1_precision + recall: pipe2_c1_recall + f1score: pipe2_c1_f1score - ################# FLOW 3: BINARY question ################# + ################# PIPE 3: BINARY question ################# - # Answer encoding for flow 3. - flow3_binary_answer_indexer: + # Answer encoding for pipe 3. + pipe3_binary_answer_indexer: type: LabelIndexer priority: 3.0 data_folder: ~/data/vqa-med @@ -172,12 +172,12 @@ pipeline: export_word_mappings_to_globals: True streams: inputs: answers - outputs: flow3_binary_answers_ids + outputs: pipe3_binary_answers_ids globals: vocabulary_size: vocabulary_size_binary_yn word_mappings: word_mappings_binary_yn - flow3_binary_string_to_mask: + pipe3_binary_string_to_mask: priority: 3.1 type: StringToMask globals: @@ -185,33 +185,33 @@ pipeline: streams: strings: category_names # predicted_question_categories_names ## FOR NOW! string_indices: predicted_binary_question_categories_indices # NOT USED - masks: flow3_binary_masks + masks: pipe3_binary_masks # Model 4: FFN C1 answering - flow3_binary_lstm: + pipe3_binary_lstm: priority: 3.2 type: FeedForwardNetwork hidden: [50] dropout_rate: 0.5 streams: inputs: lstm_activations_questions - predictions: flow3_binary_predictions + predictions: pipe3_binary_predictions globals: input_size: question_lstm_output_size # Set by global publisher prediction_size: vocabulary_size_binary_yn - flow3_binary_nllloss: + pipe3_binary_nllloss: type: NLLLoss priority: 3.3 targets_dim: 1 use_masking: True streams: - predictions: flow3_binary_predictions - masks: flow3_binary_masks - targets: flow3_binary_answers_ids - loss: flow3_binary_loss + predictions: pipe3_binary_predictions + masks: pipe3_binary_masks + targets: pipe3_binary_answers_ids + loss: pipe3_binary_loss - flow3_binary_precision_recall: + pipe3_binary_precision_recall: type: PrecisionRecallStatistics priority: 3.4 use_masking: True @@ -219,22 +219,22 @@ pipeline: #show_class_scores: True #show_confusion_matrix: True streams: - masks: flow3_binary_masks - predictions: flow3_binary_predictions - targets: flow3_binary_answers_ids + masks: pipe3_binary_masks + predictions: pipe3_binary_predictions + targets: pipe3_binary_answers_ids globals: word_mappings: word_mappings_binary_yn num_classes: vocabulary_size_binary_yn statistics: - precision: flow3_binary_precision - recall: flow3_binary_recall - f1score: flow3_binary_f1score + precision: pipe3_binary_precision + recall: pipe3_binary_recall + f1score: pipe3_binary_f1score # Viewers. viewer: type: StreamViewer priority: 100.1 - input_streams: questions,answers, category_names,predicted_question_categories_names, flow2_c1_masks,flow2_c1_answers_without_yn_ids,flow2_c1_predictions, flow3_binary_masks,flow3_binary_answers_ids,flow3_binary_predictions + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe2_c1_masks,pipe2_c1_answers_without_yn_ids,pipe2_c1_predictions, pipe3_binary_masks,pipe3_binary_answers_ids,pipe3_binary_predictions #: pipeline diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml new file mode 100644 index 0000000..4ffc007 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml @@ -0,0 +1,401 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c1_binasry + word_mappings: word_mappings_all_c1_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 question ################# + + # Answer encoding for PIPE 5. + pipe5_c1_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe5_c1_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe5_c1_masks + + # Model 4: FFN C1 answering + pipe5_c1_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c1_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_without_yn + + pipe5_c1_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c1_predictions + masks: pipe5_c1_masks + targets: pipe5_c1_answers_without_yn_ids + loss: pipe5_c1_loss + + pipe5_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c1_masks + predictions: pipe5_c1_predictions + targets: pipe5_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe5_c1_precision + recall: pipe5_c1_recall + f1score: pipe5_c1_f1score + + ################# PIPE 6: BINARY question ################# + + # Answer encoding for pipe 6. + pipe6_binary_answer_indexer: + type: LabelIndexer + priority: 6.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe6_binary_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe6_binary_masks + + # Model 4: FFN C1 answering + pipe6_binary_ffn: + priority: 6.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_binary_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + + pipe6_binary_nllloss: + type: NLLLoss + priority: 6.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_binary_predictions + masks: pipe6_binary_masks + targets: pipe6_binary_answers_ids + loss: pipe6_binary_loss + + pipe6_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 6.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_binary_masks + predictions: pipe6_binary_predictions + targets: pipe6_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe6_binary_precision + recall: pipe6_binary_recall + f1score: pipe6_binary_f1score + + ################# PIPE 7: MERGE ANSWERS ################# + + # Merge predictions + pipe7_merged_predictions: + type: JoinMaskedPredictions + priority: 7.1 + # Names of used input streams. + input_prediction_streams: [pipe5_c1_predictions, pipe6_binary_predictions] + input_mask_streams: [pipe5_c1_masks, pipe6_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_binary + streams: + output_strings: pipe7_merged_predictions + output_indices: pipe7_merged_pred_indices + + # Statistics. + pipe7_merged_precision_recall: + type: PrecisionRecallStatistics + priority: 7.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_binary + streams: + targets: all_answers_ids + predictions: pipe7_merged_pred_indices + statistics: + precision: pipe7_merged_precision + recall: pipe7_merged_recall + f1score: pipe7_merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 4.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_binary_masks,pipe6_binary_answers_ids,pipe6_binary_predictions, pipe7_merged_predictions + + +#: pipeline diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml similarity index 74% rename from configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml rename to configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml index a7e755d..b7281c9 100644 --- a/configs/vqa_med_2019/vf/c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml @@ -20,9 +20,9 @@ validation: pipeline: - name: c1_binary_vf_rnn_category_classification_shared_question_rnn_two_ffns_losses + name: c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses - ################# FLOW 0: SHARED ################# + ################# PIPE 0: SHARED ################# # Add global variables. global_publisher: @@ -30,17 +30,17 @@ pipeline: priority: 0 # Add input_size to globals. keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix] - values: [100, 2, 10, 100,{"C1": 0}, {"BINARY": 0}] + values: [100, 2, 10, 100, {"C1": 0}, {"BINARY": 0}] # Statistics. batch_size: type: BatchSizeStatistics priority: 0.1 - ################# FLOW 0: SHARED QUESTION ################# + ################# PIPE 0: SHARED QUESTION ################# - # Questions encoding. - flow0_question_tokenizer: + # Questions encoding. + pipe0_question_tokenizer: priority: 0.2 type: SentenceTokenizer streams: @@ -48,7 +48,7 @@ pipeline: outputs: tokenized_questions # Model 1: question embeddings - flow0_question_embeddings: + pipe0_question_embeddings: type: SentenceEmbeddings priority: 0.3 # LOAD AND FREEZE # @@ -66,7 +66,7 @@ pipeline: outputs: embedded_questions # Model 2: question RNN - flow0_lstm: + pipe0_lstm: priority: 0.4 type: RecurrentNeuralNetwork cell_type: LSTM @@ -88,7 +88,7 @@ pipeline: prediction_size: question_lstm_output_size # Answer encoding - flow0_all_answer_indexer: + pipe0_all_answer_indexer: type: LabelIndexer priority: 0.6 data_folder: ~/data/vqa-med @@ -103,10 +103,10 @@ pipeline: word_mappings: word_mappings_all_c1_binary - ################# FLOW 1: CATEGORY ################# + ################# PIPE 1: CATEGORY ################# # Model 1: question embeddings - flow1_question_embeddings: + pipe1_question_embeddings: type: SentenceEmbeddings priority: 1.1 # LOAD AND FREEZE # @@ -121,10 +121,10 @@ pipeline: word_mappings_file: questions.all.word.mappings.csv streams: inputs: tokenized_questions - outputs: flow1_embedded_questions + outputs: pipe1_embedded_questions # Model 2: question RNN - flow1_lstm: + pipe1_lstm: priority: 1.2 type: RecurrentNeuralNetwork cell_type: LSTM @@ -139,14 +139,14 @@ pipeline: use_logsoftmax: False dropout_rate: 0.5 streams: - inputs: flow1_embedded_questions - predictions: flow1_lstm_activations_questions + inputs: pipe1_embedded_questions + predictions: pipe1_lstm_activations_questions globals: input_size: embeddings_size prediction_size: question_lstm_output_size # Model 3: FFN question category - flow1_classifier: + pipe1_classifier: priority: 1.3 type: FeedForwardNetwork # LOAD AND FREEZE # @@ -158,37 +158,37 @@ pipeline: hidden: [50] dropout_rate: 0.5 streams: - inputs: flow1_lstm_activations_questions - predictions: flow1_predicted_question_categories_preds + inputs: pipe1_lstm_activations_questions + predictions: pipe1_predicted_question_categories_preds globals: input_size: question_lstm_output_size # Set by global publisher prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK - flow1_category_decoder: + pipe1_category_decoder: priority: 1.4 type: WordDecoder # Use the same word mappings as label indexer. import_word_mappings_from_globals: True streams: - inputs: flow1_predicted_question_categories_preds - outputs: flow1_predicted_question_categories_names + inputs: pipe1_predicted_question_categories_preds + outputs: pipe1_predicted_question_categories_names globals: vocabulary_size: num_categories word_mappings: category_word_mappings - flow1_category_accuracy: + pipe1_category_accuracy: type: AccuracyStatistics priority: 1.5 streams: targets: category_ids - predictions: flow1_predicted_question_categories_preds + predictions: pipe1_predicted_question_categories_preds statistics: accuracy: categorization_accuracy - ################# FLOW 2: C1 question ################# + ################# PIPE 2: C1 question ################# - # Answer encoding for flow 2. - flow2_c1_answer_indexer: + # Answer encoding for PIPE 2. + pipe2_c1_answer_indexer: type: LabelIndexer priority: 2.0 data_folder: ~/data/vqa-med @@ -197,47 +197,47 @@ pipeline: export_word_mappings_to_globals: True streams: inputs: answers - outputs: flow2_c1_answers_without_yn_ids + outputs: pipe2_c1_answers_without_yn_ids globals: vocabulary_size: vocabulary_size_c1_without_yn word_mappings: word_mappings_c1_without_yn # Sample masking based on categories. - flow2_c1_string_to_mask: + pipe2_c1_string_to_mask: priority: 2.1 type: StringToMask globals: word_mappings: category_c1_word_to_ix streams: - strings: flow1_predicted_question_categories_names + strings: pipe1_predicted_question_categories_names string_indices: predicted_c1_question_categories_indices # NOT USED - masks: flow2_c1_masks + masks: pipe2_c1_masks # Model 4: FFN C1 answering - flow2_c1_lstm: + pipe2_c1_lstm: priority: 2.2 type: FeedForwardNetwork hidden: [50] dropout_rate: 0.5 streams: inputs: lstm_activations_questions - predictions: flow2_c1_predictions + predictions: pipe2_c1_predictions globals: input_size: question_lstm_output_size # Set by global publisher prediction_size: vocabulary_size_c1_without_yn - flow2_c1_nllloss: + pipe2_c1_nllloss: type: NLLLoss priority: 2.3 targets_dim: 1 use_masking: True streams: - predictions: flow2_c1_predictions - masks: flow2_c1_masks - targets: flow2_c1_answers_without_yn_ids - loss: flow2_c1_loss + predictions: pipe2_c1_predictions + masks: pipe2_c1_masks + targets: pipe2_c1_answers_without_yn_ids + loss: pipe2_c1_loss - flow2_c1_precision_recall: + pipe2_c1_precision_recall: type: PrecisionRecallStatistics priority: 2.4 use_word_mappings: True @@ -245,21 +245,21 @@ pipeline: #show_class_scores: True #show_confusion_matrix: True streams: - masks: flow2_c1_masks - predictions: flow2_c1_predictions - targets: flow2_c1_answers_without_yn_ids + masks: pipe2_c1_masks + predictions: pipe2_c1_predictions + targets: pipe2_c1_answers_without_yn_ids globals: word_mappings: word_mappings_c1_without_yn num_classes: vocabulary_size_c1_without_yn statistics: - precision: flow2_c1_precision - recall: flow2_c1_recall - f1score: flow2_c1_f1score + precision: pipe2_c1_precision + recall: pipe2_c1_recall + f1score: pipe2_c1_f1score - ################# FLOW 3: BINARY question ################# + ################# PIPE 3: BINARY question ################# - # Answer encoding for flow 3. - flow3_binary_answer_indexer: + # Answer encoding for PIPE 3. + pipe3_binary_answer_indexer: type: LabelIndexer priority: 3.0 data_folder: ~/data/vqa-med @@ -268,46 +268,46 @@ pipeline: export_word_mappings_to_globals: True streams: inputs: answers - outputs: flow3_binary_answers_ids + outputs: pipe3_binary_answers_ids globals: vocabulary_size: vocabulary_size_binary_yn word_mappings: word_mappings_binary_yn - flow3_binary_string_to_mask: + pipe3_binary_string_to_mask: priority: 3.1 type: StringToMask globals: word_mappings: category_binary_word_to_ix streams: - strings: flow1_predicted_question_categories_names + strings: pipe1_predicted_question_categories_names string_indices: predicted_binary_question_categories_indices # NOT USED - masks: flow3_binary_masks + masks: pipe3_binary_masks # Model 4: FFN C1 answering - flow3_binary_lstm: + pipe3_binary_lstm: priority: 3.2 type: FeedForwardNetwork hidden: [50] dropout_rate: 0.5 streams: inputs: lstm_activations_questions - predictions: flow3_binary_predictions + predictions: pipe3_binary_predictions globals: input_size: question_lstm_output_size # Set by global publisher prediction_size: vocabulary_size_binary_yn - flow3_binary_nllloss: + pipe3_binary_nllloss: type: NLLLoss priority: 3.3 targets_dim: 1 use_masking: True streams: - predictions: flow3_binary_predictions - masks: flow3_binary_masks - targets: flow3_binary_answers_ids - loss: flow3_binary_loss + predictions: pipe3_binary_predictions + masks: pipe3_binary_masks + targets: pipe3_binary_answers_ids + loss: pipe3_binary_loss - flow3_binary_precision_recall: + pipe3_binary_precision_recall: type: PrecisionRecallStatistics priority: 3.4 use_word_mappings: True @@ -315,26 +315,26 @@ pipeline: #show_class_scores: True #show_confusion_matrix: True streams: - masks: flow3_binary_masks - predictions: flow3_binary_predictions - targets: flow3_binary_answers_ids + masks: pipe3_binary_masks + predictions: pipe3_binary_predictions + targets: pipe3_binary_answers_ids globals: word_mappings: word_mappings_binary_yn num_classes: vocabulary_size_binary_yn statistics: - precision: flow3_binary_precision - recall: flow3_binary_recall - f1score: flow3_binary_f1score + precision: pipe3_binary_precision + recall: pipe3_binary_recall + f1score: pipe3_binary_f1score - ################# FLOW 4: MERGE ANSWERS ################# + ################# PIPE 4: MERGE ANSWERS ################# # Merge predictions merged_predictions: type: JoinMaskedPredictions priority: 4.1 # Names of used input streams. - input_prediction_streams: [flow2_c1_predictions, flow3_binary_predictions] - input_mask_streams: [flow2_c1_masks, flow3_binary_masks] + input_prediction_streams: [pipe2_c1_predictions, pipe3_binary_predictions] + input_mask_streams: [pipe2_c1_masks, pipe3_binary_masks] input_word_mappings: [word_mappings_c1_without_yn, word_mappings_binary_yn] globals: output_word_mappings: word_mappings_all_c1_binary @@ -365,7 +365,7 @@ pipeline: viewer: type: StreamViewer priority: 4.3 - input_streams: questions,answers, category_names,predicted_question_categories_names, flow2_c1_masks,flow2_c1_answers_without_yn_ids,flow2_c1_predictions, flow3_binary_masks,flow3_binary_answers_ids,flow3_binary_predictions, merged_predictions + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe2_c1_masks,pipe2_c1_answers_without_yn_ids,pipe2_c1_predictions, pipe3_binary_masks,pipe3_binary_answers_ids,pipe3_binary_predictions, merged_predictions #: pipeline From 88b449f4c5a502b93033b731dc8b05114d6571b8 Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 17 Apr 2019 13:02:24 -0700 Subject: [PATCH 20/39] Changed default out_of_vocabulary value to -100, which is used by PyTorch to indicate targets that will be omitted --- configs/default/components/masking/string_to_mask.yml | 7 +++++-- configs/default/components/text/label_indexer.yml | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/configs/default/components/masking/string_to_mask.yml b/configs/default/components/masking/string_to_mask.yml index bcc363d..2b74456 100644 --- a/configs/default/components/masking/string_to_mask.yml +++ b/configs/default/components/masking/string_to_mask.yml @@ -4,9 +4,12 @@ # 1. CONFIGURATION PARAMETERS that will be LOADED by the component. #################################################################### -# Value that will be used when word is out of vocavbulary (LOADED) +# Value that will be used when word is out of vocabulary (LOADED) # (Mask for that element will be 0 as well) -out_of_vocabulary_value: -1 +# -100 is the default value used by PyTroch loss functions to specify +# target values that will ignored and does not contribute to the input gradient. +# (ignore_index=-100) +out_of_vocabulary_value: -100 streams: #################################################################### diff --git a/configs/default/components/text/label_indexer.yml b/configs/default/components/text/label_indexer.yml index 5b871e9..bfe9aa0 100644 --- a/configs/default/components/text/label_indexer.yml +++ b/configs/default/components/text/label_indexer.yml @@ -25,9 +25,12 @@ import_word_mappings_from_globals: False # Flag informing whether word mappings will be exported to globals (LOADED) export_word_mappings_to_globals: False -# Value that will be used when word is out of vocavbulary (LOADED) +# Value that will be used when word is out of vocabulary (LOADED) # (Mask for that element will be 0 as well) -out_of_vocabulary_value: -1 +# -100 is the default value used by PyTroch loss functions to specify +# target values that will ignored and does not contribute to the input gradient. +# (ignore_index=-100) +out_of_vocabulary_value: -100 streams: #################################################################### From d47ddd321c1d316e02336c4b2f5af61f9efd6c90 Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 17 Apr 2019 13:02:46 -0700 Subject: [PATCH 21/39] c1 + c2 + Y/N multimodal config --- ...nn_shared_all_encoders_two_ffns_losses.yml | 2 +- ...nn_shared_all_encoders_two_ffns_losses.yml | 472 ++++++++++++++++++ 2 files changed, 473 insertions(+), 1 deletion(-) create mode 100644 configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml diff --git a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml index 4ffc007..6996f91 100644 --- a/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml @@ -394,7 +394,7 @@ pipeline: # Viewers. viewer: type: StreamViewer - priority: 4.3 + priority: 7.3 input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_binary_masks,pipe6_binary_answers_ids,pipe6_binary_predictions, pipe7_merged_predictions diff --git a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml new file mode 100644 index 0000000..efa6580 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml @@ -0,0 +1,472 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C3 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1_c3_binary_yn.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1,C3 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100,{"C1": 0}, {"C3": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + #vocabulary_size: vocabulary_size_all_c1_c3_binary + word_mappings: word_mappings_all_c1_c3_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 question ################# + + # Answer encoding for PIPE 5. + pipe5_c1_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe5_c1_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe5_c1_masks + + # Model 4: FFN C1 answering + pipe5_c1_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c1_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_without_yn + + pipe5_c1_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c1_predictions + masks: pipe5_c1_masks + targets: pipe5_c1_answers_without_yn_ids + loss: pipe5_c1_loss + + pipe5_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c1_masks + predictions: pipe5_c1_predictions + targets: pipe5_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + #num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe5_c1_precision + recall: pipe5_c1_recall + f1score: pipe5_c1_f1score + + ################# PIPE 6: C3 question ################# + + # Answer encoding for PIPE 6. + pipe6_c3_answer_indexer: + type: LabelIndexer + priority: 6.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c3.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_c3_answers_ids + globals: + vocabulary_size: vocabulary_size_c3 + word_mappings: word_mappings_c3 + + # Sample masking based on categories. + pipe6_c3_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_c3_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c3_question_categories_indices # NOT USED + masks: pipe6_c3_masks + + # Model 4: FFN C1 answering + pipe6_c3_ffn: + priority: 6.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c3_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c3 + + pipe6_c3_nllloss: + type: NLLLoss + priority: 6.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_c3_predictions + masks: pipe6_c3_masks + targets: pipe6_c3_answers_ids + loss: pipe6_c3_loss + + pipe6_c3_precision_recall: + type: PrecisionRecallStatistics + priority: 6.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_c3_masks + predictions: pipe6_c3_predictions + targets: pipe6_c3_answers_ids + globals: + word_mappings: word_mappings_c3 + #num_classes: vocabulary_size_c3 + statistics: + precision: pipe6_c3_precision + recall: pipe6_c3_recall + f1score: pipe6_c3_f1score + + ################# PIPE 7: BINARY question ################# + + # Answer encoding for pipe 7. + pipe7_binary_answer_indexer: + type: LabelIndexer + priority: 7.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe7_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe7_binary_string_to_mask: + priority: 7.2 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe7_binary_masks + + # Model 4: FFN C1 answering + pipe7_binary_ffn: + priority: 7.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe7_binary_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + + pipe7_binary_nllloss: + type: NLLLoss + priority: 7.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe7_binary_predictions + masks: pipe7_binary_masks + targets: pipe7_binary_answers_ids + loss: pipe7_binary_loss + + pipe7_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 7.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe7_binary_masks + predictions: pipe7_binary_predictions + targets: pipe7_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + #num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe7_binary_precision + recall: pipe7_binary_recall + f1score: pipe7_binary_f1score + + ################# PIPE 8: MERGE ANSWERS ################# + + # Merge predictions + pipe8_merged_predictions: + type: JoinMaskedPredictions + priority: 8.1 + # Names of used input streams. + input_prediction_streams: [pipe5_c1_predictions, pipe6_c3_predictions, pipe7_binary_predictions] + input_mask_streams: [pipe5_c1_masks, pipe6_c3_masks, pipe7_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c3, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_c3_binary + streams: + output_strings: pipe8_merged_predictions + output_indices: pipe8_merged_pred_indices + + # Statistics. + pipe8_merged_precision_recall: + type: PrecisionRecallStatistics + priority: 8.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_c3_binary + streams: + targets: all_answers_ids + predictions: pipe8_merged_pred_indices + statistics: + precision: pipe8_merged_precision + recall: pipe8_merged_recall + f1score: pipe8_merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 8.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c3_masks,pipe6_c3_answers_ids,pipe6_c3_predictions, pipe7_binary_masks,pipe7_binary_answers_ids,pipe7_binary_predictions, pipe8_merged_predictions + + +#: pipeline From 8d93eace685a72fee311580af6c51ab2a0976e45 Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 17 Apr 2019 14:17:27 -0700 Subject: [PATCH 22/39] c2 multimodal model --- ...c2_classification_all_rnn_vgg16_concat.yml | 101 ++++++++++++++++++ .../default_c2_classification.yml | 91 ++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml create mode 100644 configs/vqa_med_2019/c2_classification/default_c2_classification.yml diff --git a/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml new file mode 100644 index 0000000..d4745b6 --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/c2_classification_all_rnn_vgg16_concat.yml @@ -0,0 +1,101 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml + +pipeline: + name: vqa_med_c2_classification_all_rnn_vgg_concat + + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size] + values: [100, 2, 10, 100] + + # First subpipeline: question. + # Questions encoding. + question_tokenizer: + type: SentenceTokenizer + priority: 1.1 + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + type: SentenceEmbeddings + priority: 1.2 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + question_lstm: + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + priority: 1.3 + use_logsoftmax: False + initial_state_trainable: False + #num_layers: 5 + hidden_size: 50 + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_embeddings_output_size + + # 2nd subpipeline: image size. + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 2.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + # 3rd subpipeline: image. + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 3.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + # 4th subpipeline: concatenation + FF. + concat: + type: Concatenation + priority: 4.1 + input_streams: [question_activations,image_size_activations,image_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,10],[-1,100]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: output_size + + + classifier: + type: FeedForwardNetwork + hidden_sizes: [100] + priority: 4.2 + streams: + inputs: concatenated_activations + globals: + input_size: output_size + prediction_size: vocabulary_size_c2 + + + #: pipeline diff --git a/configs/vqa_med_2019/c2_classification/default_c2_classification.yml b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml new file mode 100644 index 0000000..3df45b4 --- /dev/null +++ b/configs/vqa_med_2019/c2_classification/default_c2_classification.yml @@ -0,0 +1,91 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C2 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c2.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C2 + dataloader: + num_workers: 4 + + +pipeline: + + # Answer encoding. + answer_indexer: + type: LabelIndexer + priority: 0.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c2.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: answers_ids + globals: + vocabulary_size: vocabulary_size_c2 + word_mappings: word_mappings_c2 + + + # Predictions decoder. + prediction_decoder: + type: WordDecoder + priority: 10.1 + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: predictions + outputs: predicted_answers + globals: + vocabulary_size: vocabulary_size_c2 + word_mappings: word_mappings_c2 + + # Loss + nllloss: + type: NLLLoss + priority: 10.2 + targets_dim: 1 + streams: + targets: answers_ids + loss: loss + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 100.1 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.2 + # streams: + # targets: answers_ids + + precision_recall: + type: PrecisionRecallStatistics + priority: 100.3 + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + streams: + targets: answers_ids + globals: + word_mappings: word_mappings_c2 + num_classes: vocabulary_size_c2 + + # Viewers. + viewer: + type: StreamViewer + priority: 100.4 + input_streams: questions,category_names,answers,predicted_answers + +#: pipeline From a7a909779360c02a3fb17317b84dd5233961df32 Mon Sep 17 00:00:00 2001 From: tkornut Date: Wed, 17 Apr 2019 14:20:16 -0700 Subject: [PATCH 23/39] rename c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses --- ...binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename configs/vqa_med_2019/vf/{c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml => c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml} (99%) diff --git a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml similarity index 99% rename from configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml rename to configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml index efa6580..3b1d952 100644 --- a/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses.yml @@ -20,7 +20,7 @@ validation: pipeline: - name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_two_ffns_losses + name: c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses ################# PIPE 0: SHARED ################# From 4bc779cec6f2c36e71840193a45b58e981ea23d0 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:17:18 -0700 Subject: [PATCH 24/39] microupdate of wikitext lm rnn config --- configs/wikitext/wikitext_language_modeling_rnn.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml index 0d5a5c3..88274b9 100644 --- a/configs/wikitext/wikitext_language_modeling_rnn.yml +++ b/configs/wikitext/wikitext_language_modeling_rnn.yml @@ -72,7 +72,7 @@ pipeline: lstm: type: RecurrentNeuralNetwork priority: 3 - #initial_state_trainable: False + initial_state_trainable: False streams: inputs: embedded_sources globals: @@ -88,4 +88,10 @@ pipeline: targets: indexed_targets loss: loss + # Viewers. + viewer: + type: StreamViewer + priority: 100.1 + input_streams: sources,indexed_targets,targets,predictions + #: pipeline From f2f10fbd2c5d138c9b4d5ce3928f5812dc9fe1a0 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:41:06 -0700 Subject: [PATCH 25/39] c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml --- ...n_shared_all_encoders_four_ffns_losses.yml | 542 ++++++++++++++++++ 1 file changed, 542 insertions(+) create mode 100644 configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml new file mode 100644 index 0000000..065a14e --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml @@ -0,0 +1,542 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c2_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0}, {"C2": 0}, {"C3": 0}, {"BINARY": 0}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + #vocabulary_size: vocabulary_size_all_c1_c2_c3_binary + word_mappings: word_mappings_all_c1_c2_c3_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 question ################# + + # Answer encoding for PIPE 5. + pipe5_c1_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_without_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_c1_answers_without_yn_ids + globals: + vocabulary_size: vocabulary_size_c1_without_yn + word_mappings: word_mappings_c1_without_yn + + # Sample masking based on categories. + pipe5_c1_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_question_categories_indices # NOT USED + masks: pipe5_c1_masks + + # Model 4: FFN C1 answering + pipe5_c1_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_c1_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_without_yn + + pipe5_c1_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_c1_predictions + masks: pipe5_c1_masks + targets: pipe5_c1_answers_without_yn_ids + loss: pipe5_c1_loss + + pipe5_c1_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_c1_masks + predictions: pipe5_c1_predictions + targets: pipe5_c1_answers_without_yn_ids + globals: + word_mappings: word_mappings_c1_without_yn + #num_classes: vocabulary_size_c1_without_yn + statistics: + precision: pipe5_c1_precision + recall: pipe5_c1_recall + f1score: pipe5_c1_f1score + + ################# PIPE 6: C2 question ################# + + # Answer encoding for PIPE 6. + pipe6_c2_answer_indexer: + type: LabelIndexer + priority: 6.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c2.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe6_c2_answers_ids + globals: + vocabulary_size: vocabulary_size_c2 + word_mappings: word_mappings_c2 + + # Sample masking based on categories. + pipe6_c2_string_to_mask: + priority: 6.2 + type: StringToMask + globals: + word_mappings: category_c2_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c2_question_categories_indices # NOT USED + masks: pipe6_c2_masks + + # Model 4: FFN C1 answering + pipe6_c2_ffn: + priority: 6.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe6_c2_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c2 + + pipe6_c2_nllloss: + type: NLLLoss + priority: 6.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe6_c2_predictions + masks: pipe6_c2_masks + targets: pipe6_c2_answers_ids + loss: pipe6_c2_loss + + pipe6_c2_precision_recall: + type: PrecisionRecallStatistics + priority: 6.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe6_c2_masks + predictions: pipe6_c2_predictions + targets: pipe6_c2_answers_ids + globals: + word_mappings: word_mappings_c2 + statistics: + precision: pipe6_c2_precision + recall: pipe6_c2_recall + f1score: pipe6_c2_f1score + + ################# PIPE 7: C3 question ################# + + # Answer encoding for PIPE 7. + pipe7_c3_answer_indexer: + type: LabelIndexer + priority: 7.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c3.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe7_c3_answers_ids + globals: + vocabulary_size: vocabulary_size_c3 + word_mappings: word_mappings_c3 + + # Sample masking based on categories. + pipe7_c3_string_to_mask: + priority: 7.2 + type: StringToMask + globals: + word_mappings: category_c3_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c3_question_categories_indices # NOT USED + masks: pipe7_c3_masks + + # Model 4: FFN C1 answering + pipe7_c3_ffn: + priority: 7.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe7_c3_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c3 + + pipe7_c3_nllloss: + type: NLLLoss + priority: 7.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe7_c3_predictions + masks: pipe7_c3_masks + targets: pipe7_c3_answers_ids + loss: pipe7_c3_loss + + pipe7_c3_precision_recall: + type: PrecisionRecallStatistics + priority: 7.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe7_c3_masks + predictions: pipe7_c3_predictions + targets: pipe7_c3_answers_ids + globals: + word_mappings: word_mappings_c3 + #num_classes: vocabulary_size_c3 + statistics: + precision: pipe7_c3_precision + recall: pipe7_c3_recall + f1score: pipe7_c3_f1score + + ################# PIPE 8: BINARY question ################# + + # Answer encoding for pipe 8. + pipe8_binary_answer_indexer: + type: LabelIndexer + priority: 8.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe8_binary_answers_ids + globals: + vocabulary_size: vocabulary_size_binary_yn + word_mappings: word_mappings_binary_yn + + pipe8_binary_string_to_mask: + priority: 8.2 + type: StringToMask + globals: + word_mappings: category_binary_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_binary_question_categories_indices # NOT USED + masks: pipe8_binary_masks + + # Model 4: FFN C1 answering + pipe8_binary_ffn: + priority: 8.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe8_binary_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_binary_yn + + pipe8_binary_nllloss: + type: NLLLoss + priority: 8.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe8_binary_predictions + masks: pipe8_binary_masks + targets: pipe8_binary_answers_ids + loss: pipe8_binary_loss + + pipe8_binary_precision_recall: + type: PrecisionRecallStatistics + priority: 8.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe8_binary_masks + predictions: pipe8_binary_predictions + targets: pipe8_binary_answers_ids + globals: + word_mappings: word_mappings_binary_yn + #num_classes: vocabulary_size_binary_yn + statistics: + precision: pipe8_binary_precision + recall: pipe8_binary_recall + f1score: pipe8_binary_f1score + + ################# PIPE 9: MERGE ANSWERS ################# + + # Merge predictions. + pipe9_merged_predictions: + type: JoinMaskedPredictions + priority: 9.1 + # Names of used input streams. + input_prediction_streams: [pipe5_c1_predictions, pipe6_c2_predictions, pipe7_c3_predictions, pipe8_binary_predictions] + input_mask_streams: [pipe5_c1_masks, pipe6_c2_masks, pipe7_c3_masks, pipe8_binary_masks] + input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c2, word_mappings_c3, word_mappings_binary_yn] + globals: + output_word_mappings: word_mappings_all_c1_c2_c3_binary + streams: + output_strings: pipe9_merged_predictions + output_indices: pipe9_merged_pred_indices + + # Statistics. + pipe9_merged_precision_recall: + type: PrecisionRecallStatistics + priority: 9.2 + # Use prediction indices instead of distributions. + use_prediction_distributions: False + use_word_mappings: True + show_class_scores: True + show_confusion_matrix: True + globals: + word_mappings: word_mappings_all_c1_c2_c3_binary + streams: + targets: all_answers_ids + predictions: pipe9_merged_pred_indices + statistics: + precision: pipe9_merged_precision + recall: pipe9_merged_recall + f1score: pipe9_merged_f1score + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions + + +#: pipeline From 6cf7f216d83cf3b1b8efbb9a0511554f3edd7894 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:44:51 -0700 Subject: [PATCH 26/39] added viewing of streams related to C2 --- ...3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml index 065a14e..56ab04b 100644 --- a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml @@ -536,7 +536,7 @@ pipeline: viewer: type: StreamViewer priority: 9.3 - input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c2_masks,pipe6_c2_answers_ids,pipe6_c2_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions #: pipeline From aa829fbf3825f5318b7e27d1363d7ed068840008 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:54:44 -0700 Subject: [PATCH 27/39] one ffn --- ...t_rnn_shared_all_encoders_one_ffn_loss.yml | 298 ++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml new file mode 100644 index 0000000..3c47597 --- /dev/null +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml @@ -0,0 +1,298 @@ +# Load config defining problems for training, validation and testing. +default_configs: vqa_med_2019/default_vqa_med_2019.yml + +# Training parameters: +training: + problem: + categories: C1,C2,C3 + sampler: + name: WeightedRandomSampler + weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + dataloader: + num_workers: 4 + +# Validation parameters: +validation: + problem: + categories: C1,C2,C3 + dataloader: + num_workers: 4 + + +pipeline: + name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss + + ################# PIPE 0: SHARED ################# + + # Add global variables. + global_publisher: + type: GlobalVariablePublisher + priority: 0 + # Add input_size to globals. + keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_c2_c3_binary_yn_word_to_ix] + values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2, "BINARY": 3}] + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 0.1 + + # Questions encoding. + pipe1_question_tokenizer: + priority: 0.2 + type: SentenceTokenizer + streams: + inputs: questions + outputs: tokenized_questions + + ################# PIPE 0: CATEGORY ################# + + # Model 1: question embeddings + pipe0_question_embeddings: + type: SentenceEmbeddings + priority: 0.3 + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: question_embeddings + freeze: True + ################### + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: pipe0_embedded_questions + + # Model 2: question RNN + pipe0_lstm: + priority: 0.4 + type: RecurrentNeuralNetwork + cell_type: LSTM + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: lstm + freeze: True + ################### + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: pipe0_embedded_questions + predictions: pipe0_questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Model 3: FFN question category + pipe0_classifier: + priority: 0.5 + type: FeedForwardNetwork + # LOAD AND FREEZE # + load: + file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt + model: classifier + freeze: True + ################### + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: pipe0_questions_activations + predictions: pipe0_predicted_question_categories_preds + globals: + input_size: question_lstm_output_size # Set by global publisher + prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK + + pipe0_category_decoder: + priority: 0.6 + type: WordDecoder + # Use the same word mappings as label indexer. + import_word_mappings_from_globals: True + streams: + inputs: pipe0_predicted_question_categories_preds + outputs: pipe0_predicted_question_categories_names + globals: + vocabulary_size: num_categories + word_mappings: category_word_mappings + + pipe0_category_accuracy: + type: AccuracyStatistics + priority: 0.7 + streams: + targets: category_ids + predictions: pipe0_predicted_question_categories_preds + statistics: + accuracy: categorization_accuracy + + + + ################# PIPE 1: SHARED QUESTION ENCODER ################# + + # Model 1: question embeddings + pipe1_question_embeddings: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/vqa-med + word_mappings_file: questions.all.word.mappings.csv + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: question RNN + pipe1_lstm: + priority: 1.2 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state_trainable: True + use_logsoftmax: False + dropout_rate: 0.5 + streams: + inputs: embedded_questions + predictions: questions_activations + globals: + input_size: embeddings_size + prediction_size: question_lstm_output_size + + # Answer encoding + pipe1_all_answer_indexer: + type: LabelIndexer + priority: 1.3 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: all_answers_ids + globals: + vocabulary_size: vocabulary_size_all_c1_c2_c3_binary + word_mappings: word_mappings_all_c1_c2_c3_binary + + ################# PIPE 2: SHARED IMAGE ENCODER ################# + + # Image encoder. + image_encoder: + type: TorchVisionWrapper + priority: 2.1 + streams: + inputs: images + predictions: image_activations + globals: + prediction_size: image_encoder_output_size + + ################# PIPE 3: SHARED IMAGE SIZE ENCODER ################# + + # Model - image size classifier. + image_size_encoder: + type: FeedForwardNetwork + priority: 3.1 + streams: + inputs: image_sizes + predictions: image_size_activations + globals: + input_size: image_size_encoder_input_size + prediction_size: image_size_encoder_output_size + + ################# PIPE 4: SHARED CONCAT ################# + + concat: + type: Concatenation + priority: 4.1 + input_streams: [questions_activations,image_activations,image_size_activations] + # Concatenation + dim: 1 # default + input_dims: [[-1,100],[-1,100],[-1,10]] + output_dims: [-1,210] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_activations_size + + + ################# PIPE 5: C1 + C2 + C2 + Binary Y/N question ################# + + # Answer encoding for PIPE 5. + pipe5_all_answer_indexer: + type: LabelIndexer + priority: 5.1 + data_folder: ~/data/vqa-med + word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv + # Export mappings and size to globals. + export_word_mappings_to_globals: True + streams: + inputs: answers + outputs: pipe5_all_answers_ids + globals: + vocabulary_size: vocabulary_size_c1_c2_c3_binary + word_mappings: word_mappings_all_c1_c2_c3_binary + + # Sample masking based on categories. + pipe5_all_string_to_mask: + priority: 5.2 + type: StringToMask + globals: + word_mappings: category_c1_c2_c3_binary_yn_word_to_ix + streams: + strings: pipe0_predicted_question_categories_names + string_indices: predicted_c1_c2_c3_binary_by_question_categories_indices # NOT USED + masks: pipe5_all_masks + + # Model 4: FFN C1 answering + pipe5_all_ffn: + priority: 5.3 + type: FeedForwardNetwork + hidden: [50] + dropout_rate: 0.5 + streams: + inputs: concatenated_activations + predictions: pipe5_all_predictions + globals: + input_size: concatenated_activations_size + prediction_size: vocabulary_size_c1_c2_c3_binary + + pipe5_all_nllloss: + type: NLLLoss + priority: 5.4 + targets_dim: 1 + use_masking: True + streams: + predictions: pipe5_all_predictions + masks: pipe5_all_masks + targets: pipe5_all_answers_ids + loss: pipe5_all_loss + + pipe5_all_precision_recall: + type: PrecisionRecallStatistics + priority: 5.5 + use_word_mappings: True + use_masking: True + #show_class_scores: True + #show_confusion_matrix: True + streams: + masks: pipe5_all_masks + predictions: pipe5_all_predictions + targets: pipe5_all_answers_ids + globals: + word_mappings: word_mappings_all_c1_c2_c3_binary + statistics: + precision: pipe5_all_precision + recall: pipe5_all_recall + f1score: pipe5_all_f1score + + + ################# PIPE 9: MERGE ANSWERS ################# + + # Viewers. + viewer: + type: StreamViewer + priority: 9.3 + input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_all_masks,pipe5_all_answers_without_yn_ids,pipe5_all_predictions + + +#: pipeline From 48c49647ae042fda70cb1b539c6c496aeebff718 Mon Sep 17 00:00:00 2001 From: tkornut Date: Thu, 18 Apr 2019 06:55:18 -0700 Subject: [PATCH 28/39] rename c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss --- ...1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename configs/vqa_med_2019/vf/{c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml => c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml} (99%) diff --git a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml b/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml similarity index 99% rename from configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml rename to configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml index 3c47597..ef8f535 100644 --- a/configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss.yml +++ b/configs/vqa_med_2019/vf/c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss.yml @@ -20,7 +20,7 @@ validation: pipeline: - name: c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_one_ffn_loss + name: c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss ################# PIPE 0: SHARED ################# From a4527749c300e24e0283fa83e325dae89bd3a1ca Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta <43558388+tkornut@users.noreply.github.com> Date: Fri, 19 Apr 2019 09:36:44 -0700 Subject: [PATCH 29/39] Update README.md --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f13e0a7..ee9cd50 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,15 @@ ## Description -PyTorchPipe (PTP) aims at _accelerating reproducible Machine Learning Research_ by fostering the development of computational _pipelines_ and comparison of diverse neural network-based models. - -In its core, to _accelerate the computations_ on their own, PTP relies on PyTorch and extensively uses its mechanisms for distribution of computations on CPUs/GPUs. +PyTorchPipe (PTP) fosters the development of computational _pipelines_ and comparison of diverse neural network-based models. PTP frames training and testing procedures as _pipelines_ consisting of many components communicating through data streams. Each such a stream can consist of several components, including one problem instance (providing batches of data), (zero-or-more) trainable models and (any number of) additional components providing required transformations and computations. -As a result, the training & testing mechanisms are no longer pinned to a specific model or problem, and built-in mechanisms for compatibility checking (handshaking), configuration management & statistics collection facilitate running diverse experiments. +As a result, the training & testing procedures are no longer pinned to a specific problem or model, and built-in mechanisms for compatibility checking (handshaking), configuration management & statistics collection facilitate running diverse experiments. + +In its core, to _accelerate the computations_ on their own, PTP relies on PyTorch and extensively uses its mechanisms for distribution of computations on CPUs/GPUs. + ## Installation From 1d4b93ad00edb49bb66691e33c7569cfba81e099 Mon Sep 17 00:00:00 2001 From: tkornut Date: Fri, 19 Apr 2019 10:56:16 -0700 Subject: [PATCH 30/39] first version of component calculating BLEU score --- .../components/publishers/bleu_statistics.yml | 50 ++++ .../wikitext_language_modeling_rnn.yml | 23 +- ptp/components/publishers/__init__.py | 2 + ptp/components/publishers/bleu_statistics.py | 223 ++++++++++++++++++ ptp/components/text/sentence_indexer.py | 2 +- 5 files changed, 296 insertions(+), 4 deletions(-) create mode 100644 configs/default/components/publishers/bleu_statistics.yml create mode 100644 ptp/components/publishers/bleu_statistics.py diff --git a/configs/default/components/publishers/bleu_statistics.yml b/configs/default/components/publishers/bleu_statistics.yml new file mode 100644 index 0000000..2a52fa3 --- /dev/null +++ b/configs/default/components/publishers/bleu_statistics.yml @@ -0,0 +1,50 @@ +# This file defines the default values for the Accuracy statistics. + +#################################################################### +# 1. CONFIGURATION PARAMETERS that will be LOADED by the component. +#################################################################### + +# Flag indicating whether prediction are represented as distributions or indices (LOADED) +# Options: True (expects distribution for each preditions) +# False (expects indices (max args)) +use_prediction_distributions: True + +# When set to True, performs masking of selected samples from batch (LOADED) +# TODO! +#use_masking: False + +streams: + #################################################################### + # 2. Keymappings associated with INPUT and OUTPUT streams. + #################################################################### + + # Stream containing targets (label ids) (INPUT) + targets: targets + + # Stream containing batch of predictions (INPUT) + predictions: predictions + + # Stream containing masks used for masking of selected samples from batch (INPUT) + #masks: masks + +globals: + #################################################################### + # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. + #################################################################### + + # Word mappings used for mappings of predictions/targets into list of words (RERIEVED) + word_mappings: word_mappings + + #################################################################### + # 4. Keymappings associated with GLOBAL variables that will be SET. + #################################################################### + +statistics: + #################################################################### + # 5. Keymappings associated with statistics that will be ADDED. + #################################################################### + + # Name used for collected statistics (ADDED). + bleu: bleu + + diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml index 88274b9..d2806d5 100644 --- a/configs/wikitext/wikitext_language_modeling_rnn.yml +++ b/configs/wikitext/wikitext_language_modeling_rnn.yml @@ -86,12 +86,29 @@ pipeline: num_targets_dims: 2 streams: targets: indexed_targets - loss: loss + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 100.0 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.1 + # streams: + # targets: indexed_targets + + bleu: + type: BLEUStatistics + priority: 100.2 + streams: + targets: indexed_targets + + # Viewers. viewer: type: StreamViewer - priority: 100.1 - input_streams: sources,indexed_targets,targets,predictions + priority: 100.3 + input_streams: sources,targets,indexed_targets,predictions #: pipeline diff --git a/ptp/components/publishers/__init__.py b/ptp/components/publishers/__init__.py index a412f6f..1db7f75 100644 --- a/ptp/components/publishers/__init__.py +++ b/ptp/components/publishers/__init__.py @@ -1,11 +1,13 @@ from .accuracy_statistics import AccuracyStatistics from .batch_size_statistics import BatchSizeStatistics +from .bleu_statistics import BLEUStatistics from .global_variable_publisher import GlobalVariablePublisher from .precision_recall_statistics import PrecisionRecallStatistics __all__ = [ 'AccuracyStatistics', 'BatchSizeStatistics', + 'BLEUStatistics', 'GlobalVariablePublisher', 'PrecisionRecallStatistics', ] diff --git a/ptp/components/publishers/bleu_statistics.py b/ptp/components/publishers/bleu_statistics.py new file mode 100644 index 0000000..eafb7ea --- /dev/null +++ b/ptp/components/publishers/bleu_statistics.py @@ -0,0 +1,223 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) tkornuta, IBM Corporation 2019 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = "Tomasz Kornuta" + +import torch +import math +import numpy as np +from nltk.translate.bleu_score import sentence_bleu + +from ptp.components.component import Component +from ptp.data_types.data_definition import DataDefinition + + +class BLEUStatistics(Component): + """ + Class collecting statistics: BLEU (Bilingual Evaluation Understudy Score). + + It accepts targets and predictions represented as indices of words and uses the provided word mappings to change those into words used finally for calculation of BLEU similarity. + + """ + + def __init__(self, name, config): + """ + Initializes object. + + :param name: Loss name. + :type name: str + + :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). + :type config: :py:class:`ptp.configuration.ConfigInterface` + + """ + # Call constructors of parent classes. + Component.__init__(self, name, BLEUStatistics, config) + + # Get stream key mappings. + self.key_targets = self.stream_keys["targets"] + self.key_predictions = self.stream_keys["predictions"] + self.key_masks = self.stream_keys["masks"] + + # Get prediction distributions/indices flag. + self.use_prediction_distributions = self.config["use_prediction_distributions"] + + # Get masking flag. + #self.use_masking = self.config["use_masking"] + + # Retrieve word mappings from globals. + word_to_ix = self.globals["word_mappings"] + # Construct reverse mapping for faster processing. + self.ix_to_word = dict((v,k) for k,v in word_to_ix.items()) + + + # Get statistics key mappings. + self.key_bleu = self.statistics_keys["bleu"] + + + def input_data_definitions(self): + """ + Function returns a dictionary with definitions of input data that are required by the component. + + :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + # Add targets. + input_defs = { + self.key_targets: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words [BATCH_SIZE x SEQ_LENGTH]"), + } + # Add predictions. + if self.use_prediction_distributions: + input_defs[self.key_predictions] = DataDefinition([-1, -1, -1], [torch.Tensor], "Batch of predictions, represented as tensor with sequences of probability distributions over classes [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES]") + else: + input_defs[self.key_predictions] = DataDefinition([-1, -1], [torch.Tensor], "Batch of predictions, represented as tensor with sequences of indices of predicted answers [BATCH_SIZE x SEQ_LENGTH]") + # Add masks. + #if self.use_masking: + # input_defs[self.key_masks] = DataDefinition([-1, -1], [torch.Tensor], "Batch of masks (separate mask for each sequence in the batch) [BATCH_SIZE x SEQ_LENGTH]") + return input_defs + + + def output_data_definitions(self): + """ + Function returns a empty dictionary with definitions of output data produced the component. + + :return: Empty dictionary. + """ + return {} + + + def __call__(self, data_dict): + """ + Call method - empty for all statistics. + """ + pass + + + def calculate_BLEU(self, data_dict): + """ + Calculates BLEU for predictions of a given batch. + + :param data_dict: DataDict containing the targets and predictions (and optionally masks). + :type data_dict: DataDict + + :return: Accuracy. + + """ + # Get targets. + targets = data_dict[self.key_targets].data.cpu().numpy().tolist() + + if self.use_prediction_distributions: + # Get indices of the max log-probability. + preds = data_dict[self.key_predictions].max(1)[1].data.cpu().numpy().tolist() + else: + preds = data_dict[self.key_predictions].data.cpu().numpy().tolist() + + #if self.use_masking: + # # Get masks from inputs. + # masks = data_dict[self.key_masks].data.cpu().numpy().tolist() + #else: + # batch_size = preds.shape[0] + + # Calculate the correct predictinos. + scores = [] + + for target_indices, pred_indices in zip(targets, preds): + # Change target indices to words. + target_words = [] + for t_ind in target_indices: + if t_ind in self.ix_to_word.keys(): + target_words.append(self.ix_to_word[t_ind]) + # Change prediction indices to words. + pred_words = [] + for p_ind in pred_indices: + if p_ind in self.ix_to_word.keys(): + pred_words.append(self.ix_to_word[p_ind]) + # Calculate BLEU. + scores.append(sentence_bleu(target_words, pred_words)) + + # Get batch size. + batch_size = len(targets) + + # Normalize by batch size. + if batch_size > 0: + score = sum(scores) / batch_size + else: + score = 0 + + return score + + + def add_statistics(self, stat_col): + """ + Adds 'accuracy' statistics to ``StatisticsCollector``. + + :param stat_col: ``StatisticsCollector``. + + """ + stat_col.add_statistics(self.key_bleu, '{:6.4f}') + + def collect_statistics(self, stat_col, data_dict): + """ + Collects statistics (batch_size) for given episode. + + :param stat_col: ``StatisticsCollector``. + + """ + stat_col[self.key_bleu] = self.calculate_BLEU(data_dict) + + def add_aggregators(self, stat_agg): + """ + Adds aggregator summing samples from all collected batches. + + :param stat_agg: ``StatisticsAggregator``. + + """ + stat_agg.add_aggregator(self.key_bleu, '{:7.5f}') # represents the average accuracy + #stat_agg.add_aggregator(self.key_bleu+'_min', '{:7.5f}') + #stat_agg.add_aggregator(self.key_bleu+'_max', '{:7.5f}') + stat_agg.add_aggregator(self.key_bleu+'_std', '{:7.5f}') + + + def aggregate_statistics(self, stat_col, stat_agg): + """ + Aggregates samples from all collected batches. + + :param stat_col: ``StatisticsCollector`` + + :param stat_agg: ``StatisticsAggregator`` + + """ + scores = stat_col[self.key_bleu] + + # Check if batch size was collected. + if "batch_size" in stat_col.keys(): + batch_sizes = stat_col['batch_size'] + + # Calculate weighted precision. + scores_avg = np.average(scores, weights=batch_sizes) + scores_var = np.average((scores-scores_avg)**2, weights=batch_sizes) + + stat_agg[self.key_bleu] = scores_avg + #stat_agg[self.key_bleu+'_min'] = np.min(scores) + #stat_agg[self.key_bleu+'_max'] = np.max(scores) + stat_agg[self.key_bleu+'_std'] = math.sqrt(scores_var) + else: + # Else: use simple mean. + stat_agg[self.key_bleu] = np.mean(scores) + #stat_agg[self.key_bleu+'_min'] = np.min(scores) + #stat_agg[self.key_bleu+'_max'] = np.max(scores) + stat_agg[self.key_bleu+'_std'] = np.std(scores) + # But inform user about that! + self.logger.warning("Aggregated statistics might contain errors due to the lack of information about sizes of aggregated batches") diff --git a/ptp/components/text/sentence_indexer.py b/ptp/components/text/sentence_indexer.py index 18394f9..abaf94a 100644 --- a/ptp/components/text/sentence_indexer.py +++ b/ptp/components/text/sentence_indexer.py @@ -61,7 +61,7 @@ def output_data_definitions(self): :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { - self.key_outputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices [BATCH_SIZE x SEQ_LENGTH]"), + self.key_outputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words [BATCH_SIZE x SEQ_LENGTH]"), } def __call__(self, data_dict): From 9b3f977eb928399d2ba50782a767e6b0671ac291 Mon Sep 17 00:00:00 2001 From: tkornut Date: Fri, 19 Apr 2019 10:57:27 -0700 Subject: [PATCH 31/39] first version of component calculating BLEU score --- ptp/components/publishers/bleu_statistics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ptp/components/publishers/bleu_statistics.py b/ptp/components/publishers/bleu_statistics.py index eafb7ea..4c19564 100644 --- a/ptp/components/publishers/bleu_statistics.py +++ b/ptp/components/publishers/bleu_statistics.py @@ -146,6 +146,9 @@ def calculate_BLEU(self, data_dict): pred_words.append(self.ix_to_word[p_ind]) # Calculate BLEU. scores.append(sentence_bleu(target_words, pred_words)) + print("TARGET: {}\n".format(target_words)) + print("PREDICTION: {}\n".format(pred_words)) + print("BLEU: {}\n".format(scores[-1])) # Get batch size. batch_size = len(targets) From 806e00811b7c4e9f701616545a14250a0c1e69f2 Mon Sep 17 00:00:00 2001 From: tkornut Date: Fri, 19 Apr 2019 14:13:28 -0700 Subject: [PATCH 32/39] bleu with weights, fixed bug with max along item axis --- .../components/publishers/bleu_statistics.yml | 3 +++ ptp/components/publishers/bleu_statistics.py | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/configs/default/components/publishers/bleu_statistics.yml b/configs/default/components/publishers/bleu_statistics.yml index 2a52fa3..a79a245 100644 --- a/configs/default/components/publishers/bleu_statistics.yml +++ b/configs/default/components/publishers/bleu_statistics.yml @@ -13,6 +13,9 @@ use_prediction_distributions: True # TODO! #use_masking: False +# Weights of n-grams used when calculating the score. +weights: [0.25, 0.25, 0.25, 0.25] + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. diff --git a/ptp/components/publishers/bleu_statistics.py b/ptp/components/publishers/bleu_statistics.py index 4c19564..b303ea9 100644 --- a/ptp/components/publishers/bleu_statistics.py +++ b/ptp/components/publishers/bleu_statistics.py @@ -63,6 +63,9 @@ def __init__(self, name, config): # Construct reverse mapping for faster processing. self.ix_to_word = dict((v,k) for k,v in word_to_ix.items()) + # Get masking flag. + self.weights = self.config["weights"] + # Get statistics key mappings. self.key_bleu = self.statistics_keys["bleu"] @@ -120,7 +123,7 @@ def calculate_BLEU(self, data_dict): if self.use_prediction_distributions: # Get indices of the max log-probability. - preds = data_dict[self.key_predictions].max(1)[1].data.cpu().numpy().tolist() + preds = data_dict[self.key_predictions].max(-1)[1].data.cpu().numpy().tolist() else: preds = data_dict[self.key_predictions].data.cpu().numpy().tolist() @@ -133,6 +136,9 @@ def calculate_BLEU(self, data_dict): # Calculate the correct predictinos. scores = [] + #print("targets ({}): {}\n".format(len(targets), targets[0])) + #print("preds ({}): {}\n".format(len(preds), preds[0])) + for target_indices, pred_indices in zip(targets, preds): # Change target indices to words. target_words = [] @@ -145,11 +151,12 @@ def calculate_BLEU(self, data_dict): if p_ind in self.ix_to_word.keys(): pred_words.append(self.ix_to_word[p_ind]) # Calculate BLEU. - scores.append(sentence_bleu(target_words, pred_words)) - print("TARGET: {}\n".format(target_words)) - print("PREDICTION: {}\n".format(pred_words)) - print("BLEU: {}\n".format(scores[-1])) + scores.append(sentence_bleu([target_words], pred_words, self.weights)) + #print("TARGET: {}\n".format(target_words)) + #print("PREDICTION: {}\n".format(pred_words)) + #print("BLEU: {}\n".format(scores[-1])) + # Get batch size. batch_size = len(targets) From 4d133f41b0ed255934dc0328abbab68f998a9222 Mon Sep 17 00:00:00 2001 From: tkornut Date: Fri, 19 Apr 2019 14:36:49 -0700 Subject: [PATCH 33/39] Fixed mutltiple inheritance issue with mixin WordEmbeddings class --- .../components/text/sentence_indexer.yml | 3 +++ .../wikitext_language_modeling_rnn.yml | 4 +-- ptp/components/mixins/word_mappings.py | 25 ++++++++----------- ptp/components/models/sentence_embeddings.py | 2 +- ptp/components/text/label_indexer.py | 8 +++--- ptp/components/text/sentence_indexer.py | 14 ++++++++--- .../text/sentence_one_hot_encoder.py | 8 +++--- ptp/components/text/word_decoder.py | 8 +++--- 8 files changed, 41 insertions(+), 31 deletions(-) diff --git a/configs/default/components/text/sentence_indexer.yml b/configs/default/components/text/sentence_indexer.yml index 65d5d03..25c2f5e 100644 --- a/configs/default/components/text/sentence_indexer.yml +++ b/configs/default/components/text/sentence_indexer.yml @@ -25,6 +25,9 @@ import_word_mappings_from_globals: False # Flag informing whether word mappings will be exported to globals (LOADED) export_word_mappings_to_globals: False +# Operation mode. If 'reverse' is True, then it will change indices into words (LOADED) +reverse: False + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml index d2806d5..3e87643 100644 --- a/configs/wikitext/wikitext_language_modeling_rnn.yml +++ b/configs/wikitext/wikitext_language_modeling_rnn.yml @@ -5,7 +5,7 @@ training: data_folder: &data_folder ~/data/language_modeling/wikitext-2 dataset: &dataset wikitext-2 subset: train - sentence_length: 50 + sentence_length: 10 batch_size: 64 # optimizer parameters: @@ -27,7 +27,7 @@ validation: data_folder: *data_folder dataset: *dataset subset: valid - sentence_length: 50 + sentence_length: 20 batch_size: 64 # Testing parameters: diff --git a/ptp/components/mixins/word_mappings.py b/ptp/components/mixins/word_mappings.py index 53bcf0c..1920574 100644 --- a/ptp/components/mixins/word_mappings.py +++ b/ptp/components/mixins/word_mappings.py @@ -17,31 +17,26 @@ import os import ptp.components.utils.word_mappings as wm -from ptp.components.component import Component -class WordMappings(Component): +class WordMappings(object): """ Mixin class that handles the initialization of (word:index) mappings. + Assumes that it is mixed-in into class that is derived from the component. + .. warning:: + Constructor (__init__) of the Component class has to be called before component of the mixin WordMapping class. + """ - def __init__(self, name, class_type, config): + def __init__(self): #, name, class_type, config): """ Initializes the (word:index) mappings. - Loads parameters from configuration, - - :param name: Component name (read from configuration file). - :type name: str - - :param class_type: Class type of the component (derrived from this class). - - :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). - :type config: :py:class:`ptp.configuration.ConfigInterface` + Assumes that Component was initialized in advance, which means that the self object possesses the following objects: + - self.config + - self.globals + - self.logger """ - # Call constructors of parent classes. - Component.__init__(self, name, class_type, config) - # Read the actual configuration. self.data_folder = os.path.expanduser(self.config['data_folder']) diff --git a/ptp/components/models/sentence_embeddings.py b/ptp/components/models/sentence_embeddings.py index 5d44bd4..6004e2a 100644 --- a/ptp/components/models/sentence_embeddings.py +++ b/ptp/components/models/sentence_embeddings.py @@ -50,7 +50,7 @@ def __init__(self, name, config): """ # Call base class constructors. Model.__init__(self, name, SentenceEmbeddings, config) - WordMappings.__init__(self, name, SentenceEmbeddings, config) + WordMappings.__init__(self) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] diff --git a/ptp/components/text/label_indexer.py b/ptp/components/text/label_indexer.py index 410aa46..c3090cd 100644 --- a/ptp/components/text/label_indexer.py +++ b/ptp/components/text/label_indexer.py @@ -16,11 +16,12 @@ import torch +from ptp.components.component import Component from ptp.components.mixins.word_mappings import WordMappings from ptp.data_types.data_definition import DataDefinition -class LabelIndexer(WordMappings): +class LabelIndexer(Component, WordMappings): """ Class responsible for changing of samples consisting of single words/labels into indices (that e.g. can be latter used for loss calculation, PyTorch-style). """ @@ -35,8 +36,9 @@ def __init__(self, name, config): :type config: :py:class:`ptp.configuration.ConfigInterface` """ - # Call constructor(s) of parent class(es). - WordMappings.__init__(self, name, LabelIndexer, config) + # Call constructor(s) of parent class(es) - in the right order! + Component.__init__(self, name, LabelIndexer, config) + WordMappings.__init__(self) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] diff --git a/ptp/components/text/sentence_indexer.py b/ptp/components/text/sentence_indexer.py index abaf94a..b21e0f4 100644 --- a/ptp/components/text/sentence_indexer.py +++ b/ptp/components/text/sentence_indexer.py @@ -16,11 +16,12 @@ import torch +from ptp.components.component import Component from ptp.components.mixins.word_mappings import WordMappings from ptp.data_types.data_definition import DataDefinition -class SentenceIndexer(WordMappings): +class SentenceIndexer(Component, WordMappings): """ Class responsible for encoding of sequences of words into list of indices. Those can be letter embedded, encoded with 1-hot encoding or else. @@ -36,13 +37,18 @@ def __init__(self, name, config): :type config: :py:class:`ptp.configuration.ConfigInterface` """ - # Call constructor(s) of parent class(es). - WordMappings.__init__(self, name, SentenceIndexer, config) + # Call constructor(s) of parent class(es) - in the right order! + Component.__init__(self, name, SentenceIndexer, config) + WordMappings.__init__(self) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] - + + # Read mode from the configuration. + self.mode_reverse = self.config['reverse'] + + def input_data_definitions(self): """ diff --git a/ptp/components/text/sentence_one_hot_encoder.py b/ptp/components/text/sentence_one_hot_encoder.py index c25100f..b25a3e8 100644 --- a/ptp/components/text/sentence_one_hot_encoder.py +++ b/ptp/components/text/sentence_one_hot_encoder.py @@ -16,11 +16,12 @@ import torch +from ptp.components.component import Component from ptp.components.mixins.word_mappings import WordMappings from ptp.data_types.data_definition import DataDefinition -class SentenceOneHotEncoder(WordMappings): +class SentenceOneHotEncoder(Component, WordMappings): """ Class responsible for encoding of samples being sequences of words using 1-hot encoding. """ @@ -35,8 +36,9 @@ def __init__(self, name, config): :type config: :py:class:`ptp.configuration.ConfigInterface` """ - # Call constructor(s) of parent class(es). - WordMappings.__init__(self, name, SentenceOneHotEncoder, config) + # Call constructor(s) of parent class(es) - in the right order! + Component.__init__(self, name, SentenceOneHotEncoder, config) + WordMappings.__init__(self) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] diff --git a/ptp/components/text/word_decoder.py b/ptp/components/text/word_decoder.py index 0e5a052..e75dd15 100644 --- a/ptp/components/text/word_decoder.py +++ b/ptp/components/text/word_decoder.py @@ -16,11 +16,12 @@ import torch +from ptp.components.component import Component from ptp.components.mixins.word_mappings import WordMappings from ptp.data_types.data_definition import DataDefinition -class WordDecoder(WordMappings): +class WordDecoder(Component, WordMappings): """ Class responsible for decoding of samples encoded in the form of vectors ("probability distributions"). """ @@ -35,8 +36,9 @@ def __init__(self, name, config): :type config: :py:class:`ptp.configuration.ConfigInterface` """ - # Call constructor(s) of parent class(es). - WordMappings.__init__(self, name, WordDecoder, config) + # Call constructor(s) of parent class(es) - in the right order! + Component.__init__(self, name, WordDecoder, config) + WordMappings.__init__(self) # Construct reverse mapping for faster processing. self.ix_to_word = dict((v,k) for k,v in self.word_to_ix.items()) From 8585face33b486703e412902dd9d535d135ac668 Mon Sep 17 00:00:00 2001 From: tkornut Date: Fri, 19 Apr 2019 15:14:13 -0700 Subject: [PATCH 34/39] Deindexing mode added to sentence indexer --- .../components/text/sentence_indexer.yml | 5 + .../wikitext_language_modeling_rnn.yml | 21 ++- ptp/components/text/sentence_indexer.py | 131 ++++++++++++++++-- 3 files changed, 142 insertions(+), 15 deletions(-) diff --git a/configs/default/components/text/sentence_indexer.yml b/configs/default/components/text/sentence_indexer.yml index 25c2f5e..0921bc7 100644 --- a/configs/default/components/text/sentence_indexer.yml +++ b/configs/default/components/text/sentence_indexer.yml @@ -28,6 +28,11 @@ export_word_mappings_to_globals: False # Operation mode. If 'reverse' is True, then it will change indices into words (LOADED) reverse: False +# Flag indicating whether inputs are represented as distributions or indices (LOADED) +# Options: True (expects distribution for each input item in sequence) +# False (expects indices (max args)) +use_input_distributions: False + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. diff --git a/configs/wikitext/wikitext_language_modeling_rnn.yml b/configs/wikitext/wikitext_language_modeling_rnn.yml index 3e87643..811dbb5 100644 --- a/configs/wikitext/wikitext_language_modeling_rnn.yml +++ b/configs/wikitext/wikitext_language_modeling_rnn.yml @@ -46,7 +46,7 @@ pipeline: # Source encoding - model 1. source_sentence_embedding: type: SentenceEmbeddings - priority: 1.1 + priority: 1 embeddings_size: 50 pretrained_embeddings: glove.6B.50d.txt data_folder: *data_folder @@ -61,7 +61,7 @@ pipeline: # Target encoding. target_indexer: type: SentenceIndexer - priority: 2.1 + priority: 2 data_folder: *data_folder import_word_mappings_from_globals: True streams: @@ -87,6 +87,21 @@ pipeline: streams: targets: indexed_targets + # Prediction decoding. + prediction_decoder: + type: SentenceIndexer + priority: 10 + # Reverse mode. + reverse: True + # Use distributions as inputs. + use_input_distributions: True + data_folder: *data_folder + import_word_mappings_from_globals: True + streams: + inputs: predictions + outputs: prediction_sentences + + # Statistics. batch_size: type: BatchSizeStatistics @@ -109,6 +124,6 @@ pipeline: viewer: type: StreamViewer priority: 100.3 - input_streams: sources,targets,indexed_targets,predictions + input_streams: sources,targets,indexed_targets,prediction_sentences #: pipeline diff --git a/ptp/components/text/sentence_indexer.py b/ptp/components/text/sentence_indexer.py index b21e0f4..7cb0ece 100644 --- a/ptp/components/text/sentence_indexer.py +++ b/ptp/components/text/sentence_indexer.py @@ -25,6 +25,8 @@ class SentenceIndexer(Component, WordMappings): """ Class responsible for encoding of sequences of words into list of indices. Those can be letter embedded, encoded with 1-hot encoding or else. + + Additianally, when 'reverse' mode is on, it works in the oposite direction, i.e. changing tensor witl indices into list of words. """ def __init__(self, name, config): """ @@ -48,6 +50,13 @@ def __init__(self, name, config): # Read mode from the configuration. self.mode_reverse = self.config['reverse'] + if self.mode_reverse: + # We will need reverse (index:word) mapping. + self.ix_to_word = dict((v,k) for k,v in self.word_to_ix.items()) + + # Get inputs distributions/indices flag. + self.use_input_distributions = self.config["use_input_distributions"] + def input_data_definitions(self): @@ -56,9 +65,19 @@ def input_data_definitions(self): :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ - return { - self.key_inputs: DataDefinition([-1, -1, 1], [list, list, str], "Batch of sentences, each represented as a list of words [BATCH_SIZE] x [SEQ_LENGTH] x [string]"), - } + if self.mode_reverse: + if self.use_input_distributions: + return { + self.key_inputs: DataDefinition([-1, -1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor with batch of probability distributions [BATCH_SIZE x SEQ_LENGTH x ITEM_SIZE]"), + } + else: + return { + self.key_inputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words [BATCH_SIZE x SEQ_LENGTH]"), + } + else: + return { + self.key_inputs: DataDefinition([-1, -1, 1], [list, list, str], "Batch of sentences, each represented as a list of words [BATCH_SIZE] x [SEQ_LENGTH] x [string]"), + } def output_data_definitions(self): """ @@ -66,25 +85,50 @@ def output_data_definitions(self): :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ - return { - self.key_outputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words [BATCH_SIZE x SEQ_LENGTH]"), - } + if self.mode_reverse: + return { + self.key_outputs: DataDefinition([-1, -1, 1], [list, list, str], "Batch of sentences, each represented as a list of words [BATCH_SIZE] x [SEQ_LENGTH] x [string]"), + } + else: + return { + self.key_outputs: DataDefinition([-1, -1], [torch.Tensor], "Batch of sentences represented as a single tensor of indices of particular words [BATCH_SIZE x SEQ_LENGTH]"), + } + def __call__(self, data_dict): """ - Encodes "inputs" in the format of list of tokens (for a single sample) - Stores result in "encoded_inputs" field of in data_dict. + Encodes inputs into outputs. + Depending on the mode (set by 'reverse' config param) calls sentences_to_tensor() (when False) or tensor_to_sentences() (when set to True). - :param data_dict: :py:class:`ptp.utils.DataDict` object containing (among others): + :param data_dict: :py:class:`ptp.datatypes.DataDict` object. + """ + if self.mode_reverse: + if self.use_input_distributions: + # Produce list of words. + self.tensor_distributions_to_sentences(data_dict) + else: + # Produce list of words. + self.tensor_indices_to_sentences(data_dict) + else: + # Produce indices. + self.sentences_to_tensor(data_dict) + + + def sentences_to_tensor(self, data_dict): + """ + Encodes "inputs" in the format of batch of list of words into a single tensor with corresponding indices. - - "inputs": expected input field containing list of words [BATCH_SIZE] x [SEQ_SIZE] x [string] + :param data_dict: :py:class:`ptp.datatypes.DataDict` object containing (among others): - - "encoded_targets": added output field containing list of indices [BATCH_SIZE x SEQ_SIZE] + - "inputs": expected input field containing list of lists of words [BATCH_SIZE] x [SEQ_SIZE] x [string] + + - "outputs": added output field containing tensor with indices [BATCH_SIZE x SEQ_SIZE] """ # Get inputs to be encoded. inputs = data_dict[self.key_inputs] + outputs_list = [] - # Process samples 1 by one. + # Process sentences 1 by 1. for sample in inputs: assert isinstance(sample, (list,)), 'This encoder requires input sample to contain a list of words' # Process list. @@ -102,3 +146,66 @@ def __call__(self, data_dict): output = self.app_state.LongTensor(outputs_list) # Create the returned dict. data_dict.extend({self.key_outputs: output}) + + def tensor_indices_to_sentences(self, data_dict): + """ + Encodes "inputs" in the format of tensor with indices into a batch of list of words. + + :param data_dict: :py:class:`ptp.datatypes.DataDict` object containing (among others): + + - "inputs": added output field containing tensor with indices [BATCH_SIZE x SEQ_SIZE] + + - "outputs": expected input field containing list of lists of words [BATCH_SIZE] x [SEQ_SIZE] x [string] + + """ + # Get inputs to be changed to words. + inputs = data_dict[self.key_inputs].data.cpu().numpy().tolist() + + outputs_list = [] + # Process samples 1 by 1. + for sample in inputs: + # Process list. + output_sample = [] + # "Decode" sample (list of indices). + for token in sample: + # Get word. + output_word = self.ix_to_word[token] + # Add index to outputs. + output_sample.append( output_word ) + # Add sentence to batch. + outputs_list.append(output_sample) + + # Create the returned dict. + data_dict.extend({self.key_outputs: outputs_list}) + + def tensor_distributions_to_sentences(self, data_dict): + """ + Encodes "inputs" in the format of tensor with probability distributions into a batch of list of words. + + :param data_dict: :py:class:`ptp.datatypes.DataDict` object containing (among others): + + - "inputs": added output field containing tensor with indices [BATCH_SIZE x SEQ_SIZE x ITEM_SIZE] + + - "outputs": expected input field containing list of lists of words [BATCH_SIZE] x [SEQ_SIZE] x [string] + + """ + # Get inputs to be changed to words. + inputs = data_dict[self.key_inputs].max(2)[1].data.cpu().numpy().tolist() + + outputs_list = [] + # Process samples 1 by 1. + for sample in inputs: + # Process list. + output_sample = [] + # "Decode" sample (list of indices). + for token in sample: + + # Get word. + output_word = self.ix_to_word[token] + # Add index to outputs. + output_sample.append( output_word ) + # Add sentence to batch. + outputs_list.append(output_sample) + + # Create the returned dict. + data_dict.extend({self.key_outputs: outputs_list}) From 3142b7cddf4ad9042898088617a8d2a375a5b954 Mon Sep 17 00:00:00 2001 From: Alexis Asseman <33075224+aasseman@users.noreply.github.com> Date: Mon, 22 Apr 2019 17:36:21 -0700 Subject: [PATCH 35/39] Added broadcast functionality to FFN --- .../models/feed_forward_network.yml | 7 ++++++ ptp/components/models/feed_forward_network.py | 22 +++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/configs/default/components/models/feed_forward_network.yml b/configs/default/components/models/feed_forward_network.yml index 55a43b1..b9b80c0 100644 --- a/configs/default/components/models/feed_forward_network.yml +++ b/configs/default/components/models/feed_forward_network.yml @@ -15,6 +15,13 @@ dropout_rate: 0 # If true, output of the last layer will be additionally processed with Log Softmax (LOADED) use_logsoftmax: True +# Number of dimensions, where: +# - 2 means [Batch size, Input size] +# - n means [Batch size, dim 1, ..., dim n-2, Input size] +# And the FFN is broadcasted over the last (Input Size) Dimension. +# Also, all the dimensions sizes but the last are conserved, as the FFN is applied over the last dimension. +dimensions: 2 + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. diff --git a/ptp/components/models/feed_forward_network.py b/ptp/components/models/feed_forward_network.py index adbc757..5d4dbd0 100644 --- a/ptp/components/models/feed_forward_network.py +++ b/ptp/components/models/feed_forward_network.py @@ -40,6 +40,8 @@ def __init__(self, name, config): self.key_inputs = self.stream_keys["inputs"] self.key_predictions = self.stream_keys["predictions"] + self.dimensions = self.config["dimensions"] + # Retrieve input size from global variables. self.input_size = self.globals["input_size"] if type(self.input_size) == list: @@ -106,7 +108,7 @@ def input_data_definitions(self): :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { - self.key_inputs: DataDefinition([-1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x INPUT_SIZE]"), + self.key_inputs: DataDefinition(([-1] * (self.dimensions -1)) + [self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x ... x INPUT_SIZE]"), } @@ -117,7 +119,7 @@ def output_data_definitions(self): :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { - self.key_predictions: DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x PREDICTION_SIZE]") + self.key_predictions: DataDefinition(([-1] * (self.dimensions -1)) + [self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x ... x PREDICTION_SIZE]") } def forward(self, data_dict): @@ -126,13 +128,22 @@ def forward(self, data_dict): :param data_dict: DataDict({'inputs', 'predictions ...}), where: - - inputs: expected inputs [BATCH_SIZE x INPUT_SIZE], - - predictions: returned output with predictions (log_probs) [BATCH_SIZE x NUM_CLASSES] + - inputs: expected inputs [BATCH_SIZE x ... x INPUT_SIZE], + - predictions: returned output with predictions (log_probs) [BATCH_SIZE x ... x NUM_CLASSES] """ # Get inputs. x = data_dict[self.key_inputs] + # Check that the input has the number of dimensions that we expect + assert len(x.shape) == self.dimensions, \ + "Expected " + str(self.dimensions) + " dimensions for input, got " + str(len(x.shape))\ + + " instead. Check number of dimensions in the config." + + # Reshape such that we do a broadcast over the last dimension + origin_shape = x.shape + x = x.contiguous().view(-1, origin_shape[-1]) + # Propagate inputs through all but last layer. for layer in self.layers[:-1]: x = layer(x) @@ -147,5 +158,8 @@ def forward(self, data_dict): if self.use_logsoftmax: x = self.log_softmax(x) + # Restore the input dimensions but the last one (as it's been resized by the FFN) + x = x.view(*origin_shape[0:self.dimensions-1], -1) + # Add predictions to datadict. data_dict.extend({self.key_predictions: x}) From 0e4f39f58a3b75320207ffb6dbefcd70aa0ebe42 Mon Sep 17 00:00:00 2001 From: Alexis Asseman <33075224+aasseman@users.noreply.github.com> Date: Tue, 23 Apr 2019 09:43:14 -0700 Subject: [PATCH 36/39] Add simple, all-in-one seq2seq RNN component --- .../default/components/models/seq2seq_rnn.yml | 81 +++++++ ...itext_language_modeling_seq2seq_simple.yml | 167 ++++++++++++++ ptp/components/models/__init__.py | 2 + ptp/components/models/seq2seq_rnn.py | 214 ++++++++++++++++++ 4 files changed, 464 insertions(+) create mode 100644 configs/default/components/models/seq2seq_rnn.yml create mode 100644 configs/wikitext/wikitext_language_modeling_seq2seq_simple.yml create mode 100644 ptp/components/models/seq2seq_rnn.py diff --git a/configs/default/components/models/seq2seq_rnn.yml b/configs/default/components/models/seq2seq_rnn.yml new file mode 100644 index 0000000..9d9350e --- /dev/null +++ b/configs/default/components/models/seq2seq_rnn.yml @@ -0,0 +1,81 @@ +# This file defines the default values for the RNN model. + +#################################################################### +# 1. CONFIGURATION PARAMETERS that will be LOADED by the component. +#################################################################### + +# Size of the hidden state (LOADED) +hidden_size: 100 + +# Flag informing the model to learn the intial state (h0/c0) (LOADED) +# When false, (c0/c0) will be initialized as zeros. + +# Initial state type: +# * Zero (null vector) +# * Trainable (xavier initialization, trainable) +# * Input (the initial hidden state comes from an input stream) +initial_state: Trainable + +# Wether to include the last hidden state in the outputs +output_last_state: False + +# Type of recurrent cell (LOADED) +# Options: LSTM | GRU | RNN_TANH | RNN_RELU +cell_type: LSTM + +# Number of "stacked" layers (LOADED) +num_layers: 1 + +# Dropout rate (LOADED) +# Default: 0 (means that it is turned off) +dropout_rate: 0 + +# Prediction mode (LOADED) +# Options: +# * Dense (passes every activation through output layer) | +# * Last (passes only the last activation though output layer) | +# * None (all outputs are discarded) +prediction_mode: Dense + +# Input mode +# Options: +# * Dense (every iteration expects an input) +# * Autoregression_First (Autoregression, expects an input for the first iteration) +# * Autoregression_None (Autoregression, first input will be a null vector) +input_mode: Dense + +autoregression_length: 50 + +# If true, output of the last layer will be additionally processed with Log Softmax (LOADED) +use_logsoftmax: True + +streams: + #################################################################### + # 2. Keymappings associated with INPUT and OUTPUT streams. + #################################################################### + + # Stream containing batch of images (INPUT) + inputs: inputs + + # Stream containing predictions (OUTPUT) + predictions: predictions + +globals: + #################################################################### + # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. + #################################################################### + + # Size of the input (RETRIEVED) + input_size: input_size + + # Size of the prediction (RETRIEVED) + prediction_size: prediction_size + + #################################################################### + # 4. Keymappings associated with GLOBAL variables that will be SET. + #################################################################### + + #################################################################### + # 5. Keymappings associated with statistics that will be ADDED. + #################################################################### + diff --git a/configs/wikitext/wikitext_language_modeling_seq2seq_simple.yml b/configs/wikitext/wikitext_language_modeling_seq2seq_simple.yml new file mode 100644 index 0000000..731d590 --- /dev/null +++ b/configs/wikitext/wikitext_language_modeling_seq2seq_simple.yml @@ -0,0 +1,167 @@ +# This pipeline applies seq2seq on wikitext-2 to make word-level prediction. +# It's been made for test purposes only, as it is doing: +# [word 0 , ... , word 49] -> [word 1 , ... , word 50] (basically copying most of the input) +# +# The seq2seq here is implemented throught the use of a simplified seq2seq component `Seq2Seq_RNN` + +# Training parameters: +training: + problem: + type: &p_type WikiTextLanguageModeling + data_folder: &data_folder ~/data/language_modeling/wikitext-2 + dataset: &dataset wikitext-2 + subset: train + sentence_length: 50 + batch_size: 64 + + # optimizer parameters: + optimizer: + name: Adam + lr: 1.0e-3 + + # settings parameters + terminal_conditions: + loss_stop: 1.0e-2 + episode_limit: 1000000 + epoch_limit: 100 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + type: *p_type + data_folder: *data_folder + dataset: *dataset + subset: valid + sentence_length: 50 + batch_size: 64 + +# Testing parameters: +testing: + problem: + type: *p_type + data_folder: *data_folder + dataset: *dataset + subset: test + sentence_length: 50 + batch_size: 64 + +pipeline: + name: wikitext_language_modeling_rnn + + # Source encoding - model 1. + source_sentence_embedding: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings: glove.6B.50d.txt + data_folder: *data_folder + source_vocabulary_files: wiki.train.tokens,wiki.valid.tokens,wiki.test.tokens + vocabulary_mappings_file: wiki.all.tokenized_words + additional_tokens: + export_word_mappings_to_globals: True + streams: + inputs: sources + outputs: embedded_sources + + # Target encoding. + target_indexer: + type: SentenceIndexer + priority: 2.1 + data_folder: *data_folder + import_word_mappings_from_globals: True + streams: + inputs: targets + outputs: indexed_targets + + # Publish the hidden size of the seq2seq + global_publisher: + type: GlobalVariablePublisher + priority: 1 + # Add input_size to globals, so classifier will use it. + keys: s2s_hidden_size + values: 300 + + # FF, to resize the embeddings to whatever the hidden size of te seq2seq is. + ff_resize_s2s_input: + type: FeedForwardNetwork + priority: 2.5 + s2s_hidden_size: 300 + use_logsoftmax: False + dimensions: 3 + streams: + inputs: embedded_sources + predictions: embedded_sources_resized + globals: + input_size: embeddings_size + prediction_size: s2s_hidden_size + + # LSTM seq2seq + lstm_encoder: + type: Seq2Seq_RNN + priority: 3 + initial_state: Trainable + hidden_size: 300 + num_layers: 3 + use_logsoftmax: False + streams: + inputs: embedded_sources_resized + predictions: s2s_output + globals: + input_size: s2s_hidden_size + prediction_size: s2s_hidden_size + + # FF, to resize the from the hidden size of the seq2seq to the size of the target vector + ff_resize_s2s_output: + type: FeedForwardNetwork + use_logsoftmax: True + dimensions: 3 + priority: 5 + streams: + inputs: s2s_output + globals: + input_size: s2s_hidden_size + prediction_size: vocabulary_size + + # Loss + nllloss: + type: NLLLoss + priority: 6 + num_targets_dims: 2 + streams: + targets: indexed_targets + loss: loss + + # Prediction decoding. + prediction_decoder: + type: SentenceIndexer + priority: 10 + # Reverse mode. + reverse: True + # Use distributions as inputs. + use_input_distributions: True + data_folder: *data_folder + import_word_mappings_from_globals: True + streams: + inputs: predictions + outputs: prediction_sentences + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 100.0 + + bleu: + type: BLEUStatistics + priority: 100.2 + streams: + targets: indexed_targets + + + # Viewers. + viewer: + type: StreamViewer + priority: 100.3 + input_streams: sources,targets,indexed_targets,prediction_sentences + +#: pipeline diff --git a/ptp/components/models/__init__.py b/ptp/components/models/__init__.py index 32e95b0..b1d8d64 100644 --- a/ptp/components/models/__init__.py +++ b/ptp/components/models/__init__.py @@ -6,6 +6,7 @@ from .model import Model from .recurrent_neural_network import RecurrentNeuralNetwork from .sentence_embeddings import SentenceEmbeddings +from .seq2seq_rnn import Seq2Seq_RNN __all__ = [ 'ConvNetEncoder', @@ -16,4 +17,5 @@ 'Model', 'RecurrentNeuralNetwork', 'SentenceEmbeddings', + 'Seq2Seq_RNN' ] diff --git a/ptp/components/models/seq2seq_rnn.py b/ptp/components/models/seq2seq_rnn.py new file mode 100644 index 0000000..c8d1bdb --- /dev/null +++ b/ptp/components/models/seq2seq_rnn.py @@ -0,0 +1,214 @@ +# Copyright (C) tkornuta, IBM Corporation 2019 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = "Alexis Asseman" + +import torch + +from ptp.configuration.configuration_error import ConfigurationError +from ptp.components.models.model import Model +from ptp.data_types.data_definition import DataDefinition + + +class Seq2Seq_RNN(Model): + """ + Simple Classifier consisting of fully connected layer with log softmax non-linearity. + """ + def __init__(self, name, config): + """ + Initializes the model. + + :param config: Dictionary of parameters (read from configuration ``.yaml`` file). + :type config: ``ptp.configuration.ConfigInterface`` + """ + # Call constructors of parent classes. + Model.__init__(self, name, Seq2Seq_RNN, config) + + # Get input/output mode + self.input_mode = self.config["input_mode"] + + self.autoregression_length = self.config["autoregression_length"] + + # Check if initial state (h0/c0) is zero, trainable, or coming from input stream. + self.initial_state = self.config["initial_state"] + + # Get number of layers from config. + self.num_layers = self.config["num_layers"] + + # Retrieve input size from global variables. + self.key_input_size = self.global_keys["input_size"] + self.input_size = self.globals["input_size"] + if type(self.input_size) == list: + if len(self.input_size) == 1: + self.input_size = self.input_size[0] + else: + raise ConfigurationError("RNN input size '{}' must be a single dimension (current {})".format(self.key_input_size, self.input_size)) + + # Retrieve output (prediction) size from global params. + self.prediction_size = self.globals["prediction_size"] + if type(self.prediction_size) == list: + if len(self.prediction_size) == 1: + self.prediction_size = self.prediction_size[0] + else: + raise ConfigurationError("RNN prediction size '{}' must be a single dimension (current {})".format(self.key_prediction_size, self.prediction_size)) + + # Retrieve hidden size from configuration. + self.hidden_size = self.config["hidden_size"] + if type(self.hidden_size) == list: + if len(self.hidden_size) == 1: + self.hidden_size = self.hidden_size[0] + else: + raise ConfigurationError("RNN hidden_size must be a single dimension (current {})".format(self.hidden_size)) + + # Create RNN depending on the configuration + self.cell_type = self.config["cell_type"] + if self.cell_type in ['LSTM', 'GRU']: + # Create rnn cell. + self.rnn_cell_enc = getattr(torch.nn, self.cell_type)(self.input_size, self.hidden_size, self.num_layers, batch_first=True) + self.rnn_cell_dec = getattr(torch.nn, self.cell_type)(self.input_size, self.hidden_size, self.num_layers, batch_first=True) + else: + try: + # Retrieve the non-linearity. + nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[self.cell_type] + # Create rnn cell. + self.rnn_cell_enc = torch.nn.RNN(self.input_size, self.hidden_size, self.num_layers, nonlinearity=nonlinearity, batch_first=True) + self.rnn_cell_dec = torch.nn.RNN(self.input_size, self.hidden_size, self.num_layers, nonlinearity=nonlinearity, batch_first=True) + except KeyError: + raise ConfigurationError( "Invalid RNN type, available options for 'cell_type' are ['LSTM', 'GRU', 'RNN_TANH', 'RNN_RELU'] (currently '{}')".format(self.cell_type)) + + # Parameters - for a single sample. + h0 = torch.zeros(self.num_layers, 1, self.hidden_size) + c0 = torch.zeros(self.num_layers, 1, self.hidden_size) + + self.init_hidden = None + + if self.initial_state == "Trainable": + self.logger.info("Using trainable initial (h0/c0) state") + # Initialize a single vector used as hidden state. + # Initialize it using xavier initialization. + torch.nn.init.xavier_uniform(h0) + # It will be trainable, i.e. the system will learn what should be the right initialization state. + self.init_hidden = torch.nn.Parameter(h0, requires_grad=True) + # Initilize memory cell in a similar way. + if self.cell_type == 'LSTM': + torch.nn.init.xavier_uniform(c0) + self.init_memory = torch.nn.Parameter(c0, requires_grad=True) + elif self.initial_state == "Zero": + self.logger.info("Using zero initial (h0/c0) state") + # We will still embedd it into parameter to enable storing/loading of both types of models by each other. + self.init_hidden = torch.nn.Parameter(h0, requires_grad=False) + if self.cell_type == 'LSTM': + self.init_memory = torch.nn.Parameter(c0, requires_grad=False) + + # Get key mappings. + self.key_inputs = self.stream_keys["inputs"] + self.key_predictions = self.stream_keys["predictions"] + + self.logger.info("Initializing RNN with input size = {}, hidden size = {} and prediction size = {}".format(self.input_size, self.hidden_size, self.prediction_size)) + + # Create the output layer. + self.activation2output = torch.nn.Linear(self.hidden_size, self.prediction_size) + + # Create the final non-linearity. + self.use_logsoftmax = self.config["use_logsoftmax"] + if self.use_logsoftmax: + # Used then returning dense prediction, i.e. every output of unfolded model. + self.log_softmax = torch.nn.LogSoftmax(dim=2) + + def initialize_hiddens_state(self, batch_size): + + if self.cell_type == 'LSTM': + # Return tuple (hidden_state, memory_cell). + return (self.init_hidden.expand(self.num_layers, batch_size, self.hidden_size).contiguous(), + self.init_memory.expand(self.num_layers, batch_size, self.hidden_size).contiguous() ) + + else: + # Return hidden_state. + return self.init_hidden.expand(self.num_layers, batch_size, self.hidden_size).contiguous() + + + def input_data_definitions(self): + """ + Function returns a dictionary with definitions of input data that are required by the component. + + :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + d = {} + + d[self.key_inputs] = DataDefinition([-1, -1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") + + return d + + def output_data_definitions(self): + """ + Function returns a dictionary with definitions of output data produced the component. + + :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + d = {} + + d[self.key_predictions] = DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") + + return d + + def forward(self, data_dict): + """ + Forward pass of the model. + + :param data_dict: DataDict({'inputs', 'predictions ...}), where: + + - inputs: expected inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE], + - predictions: returned output with predictions (log_probs) [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE] + """ + + # Get inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE] + inputs = data_dict[self.key_inputs] + if inputs.dim() == 2: + inputs = inputs.unsqueeze(1) + batch_size = inputs.shape[0] + + + # Initialize hidden state. + hidden = self.initialize_hiddens_state(batch_size) + + + # Encoder + activations, hidden = self.rnn_cell_enc(inputs, hidden) + + # Propagate inputs through rnn cell. + activations_partial, hidden = self.rnn_cell_dec(activations[:, -1, :].unsqueeze(1), hidden) + activations = [] + activations += [activations_partial] + for i in range(self.autoregression_length - 1): + activations_partial, hidden = self.rnn_cell_dec(activations_partial, hidden) + activations += [activations_partial] + activations = torch.stack(activations, 1) + + # Pass every activation through the output layer. + # Reshape to 2D tensor [BATCH_SIZE * SEQ_LEN x HIDDEN_SIZE] + outputs = activations.contiguous().view(-1, self.hidden_size) + + # Propagate data through the output layer [BATCH_SIZE * SEQ_LEN x PREDICTION_SIZE] + outputs = self.activation2output(outputs) + + # Reshape back to 3D tensor [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE] + outputs = outputs.view(activations.size(0), activations.size(1), outputs.size(1)) + + # Log softmax - along PREDICTION dim. + if self.use_logsoftmax: + outputs = self.log_softmax(outputs) + + # Add predictions to datadict. + data_dict.extend({self.key_predictions: outputs}) + From 2794e2b8dd27d6dcb9dc40caca5c376b1e5ee640 Mon Sep 17 00:00:00 2001 From: Alexis Asseman <33075224+aasseman@users.noreply.github.com> Date: Tue, 23 Apr 2019 09:46:01 -0700 Subject: [PATCH 37/39] Modified RecurrentNeuralNetwork, such that it can input/output hidden state stream, and do autoregression. --- .../models/recurrent_neural_network.yml | 8 + .../wikitext_language_modeling_seq2seq.yml | 196 ++++++++++++++++++ .../models/recurrent_neural_network.py | 42 +++- 3 files changed, 235 insertions(+), 11 deletions(-) create mode 100644 configs/wikitext/wikitext_language_modeling_seq2seq.yml diff --git a/configs/default/components/models/recurrent_neural_network.yml b/configs/default/components/models/recurrent_neural_network.yml index f43a5bf..a0e6f5e 100644 --- a/configs/default/components/models/recurrent_neural_network.yml +++ b/configs/default/components/models/recurrent_neural_network.yml @@ -57,9 +57,17 @@ streams: # Stream containing batch of images (INPUT) inputs: inputs + # Stream containing the inital state of the RNN (INPUT) + # The stream will be actually created only if `inital_state: Input` + input_state: input_state + # Stream containing predictions (OUTPUT) predictions: predictions + # Stream containing the final output state of the RNN (output) + # The stream will be actually created only if `output_last_state: True` + output_state: output_state + globals: #################################################################### # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. diff --git a/configs/wikitext/wikitext_language_modeling_seq2seq.yml b/configs/wikitext/wikitext_language_modeling_seq2seq.yml new file mode 100644 index 0000000..84bbeaf --- /dev/null +++ b/configs/wikitext/wikitext_language_modeling_seq2seq.yml @@ -0,0 +1,196 @@ +# This pipeline applies seq2seq on wikitext-2 to make word-level prediction. +# It's been made for test purposes only, as it is doing: +# [word 0 , ... , word 49] -> [word 1 , ... , word 50] (basically copying most of the input) +# +# The seq2seq here is implemented throught the use of 2 `RecurrentNeuralNetwork` + +# Training parameters: +training: + problem: + type: &p_type WikiTextLanguageModeling + data_folder: &data_folder ~/data/language_modeling/wikitext-2 + dataset: &dataset wikitext-2 + subset: train + sentence_length: 50 + batch_size: 64 + + # optimizer parameters: + optimizer: + name: Adam + lr: 1.0e-3 + + # settings parameters + terminal_conditions: + loss_stop: 1.0e-2 + episode_limit: 1000000 + epoch_limit: 100 + +# Validation parameters: +validation: + partial_validation_interval: 100 + problem: + type: *p_type + data_folder: *data_folder + dataset: *dataset + subset: valid + sentence_length: 50 + batch_size: 64 + +# Testing parameters: +testing: + problem: + type: *p_type + data_folder: *data_folder + dataset: *dataset + subset: test + sentence_length: 50 + batch_size: 64 + +pipeline: + name: wikitext_language_modeling_seq2seq + + # Source encoding - model 1. + source_sentence_embedding: + type: SentenceEmbeddings + priority: 1.1 + embeddings_size: 50 + pretrained_embeddings: glove.6B.50d.txt + data_folder: *data_folder + source_vocabulary_files: wiki.train.tokens,wiki.valid.tokens,wiki.test.tokens + vocabulary_mappings_file: wiki.all.tokenized_words + additional_tokens: + export_word_mappings_to_globals: True + streams: + inputs: sources + outputs: embedded_sources + + # Target encoding. + target_indexer: + type: SentenceIndexer + priority: 2.1 + data_folder: *data_folder + import_word_mappings_from_globals: True + streams: + inputs: targets + outputs: indexed_targets + + # Publish the hidden size of the seq2seq + global_publisher: + type: GlobalVariablePublisher + priority: 1 + # Add input_size to globals, so classifier will use it. + keys: s2s_hidden_size + values: 300 + + # FF, to resize the embeddings to whatever the hidden size of te seq2seq is. + ff_resize_s2s_input: + type: FeedForwardNetwork + priority: 2.5 + s2s_hidden_size: 300 + use_logsoftmax: False + dimensions: 3 + streams: + inputs: embedded_sources + predictions: embedded_sources_resized + globals: + input_size: embeddings_size + prediction_size: s2s_hidden_size + + # LSTM Encoder + lstm_encoder: + type: RecurrentNeuralNetwork + priority: 3 + initial_state: Trainable + hidden_size: 300 + num_layers: 3 + use_logsoftmax: False + output_last_state: True + prediction_mode: Last + streams: + inputs: embedded_sources_resized + predictions: s2s_encoder_output + output_state: s2s_state_output + globals: + input_size: s2s_hidden_size + prediction_size: s2s_hidden_size + + # LSTM Decoder + lstm_decoder: + type: RecurrentNeuralNetwork + priority: 4 + initial_state: Input + hidden_size: 300 + num_layers: 3 + use_logsoftmax: False + input_mode: Autoregression_First + autoregression_length: 50 + prediction_mode: Dense + streams: + inputs: s2s_encoder_output + predictions: s2s_decoder_output + input_state: s2s_state_output + globals: + input_size: s2s_hidden_size + prediction_size: s2s_hidden_size + + # FF, to resize the from the hidden size of the seq2seq to the size of the target vector + ff_resize_s2s_output: + type: FeedForwardNetwork + use_logsoftmax: True + dimensions: 3 + priority: 5 + streams: + inputs: s2s_decoder_output + globals: + input_size: s2s_hidden_size + prediction_size: vocabulary_size + + # Loss + nllloss: + type: NLLLoss + priority: 6 + num_targets_dims: 2 + streams: + targets: indexed_targets + loss: loss + + # Prediction decoding. + prediction_decoder: + type: SentenceIndexer + priority: 10 + # Reverse mode. + reverse: True + # Use distributions as inputs. + use_input_distributions: True + data_folder: *data_folder + import_word_mappings_from_globals: True + streams: + inputs: predictions + outputs: prediction_sentences + + + # Statistics. + batch_size: + type: BatchSizeStatistics + priority: 100.0 + + #accuracy: + # type: AccuracyStatistics + # priority: 100.1 + # streams: + # targets: indexed_targets + + bleu: + type: BLEUStatistics + priority: 100.2 + streams: + targets: indexed_targets + + + # Viewers. + viewer: + type: StreamViewer + priority: 100.3 + input_streams: sources,targets,indexed_targets,prediction_sentences + +#: pipeline diff --git a/ptp/components/models/recurrent_neural_network.py b/ptp/components/models/recurrent_neural_network.py index d06a48a..75a7bd4 100644 --- a/ptp/components/models/recurrent_neural_network.py +++ b/ptp/components/models/recurrent_neural_network.py @@ -101,6 +101,8 @@ def __init__(self, name, config): h0 = torch.zeros(self.num_layers, 1, self.hidden_size) c0 = torch.zeros(self.num_layers, 1, self.hidden_size) + self.init_hidden = None + if self.initial_state == "Trainable": self.logger.info("Using trainable initial (h0/c0) state") # Initialize a single vector used as hidden state. @@ -112,7 +114,7 @@ def __init__(self, name, config): if self.cell_type == 'LSTM': torch.nn.init.xavier_uniform(c0) self.init_memory = torch.nn.Parameter(c0, requires_grad=True) - elif self.initial_state == "Zero": + elif self.initial_state in ["Zero", "Input"]: self.logger.info("Using zero initial (h0/c0) state") # We will still embedd it into parameter to enable storing/loading of both types of models by each other. self.init_hidden = torch.nn.Parameter(h0, requires_grad=False) @@ -167,10 +169,11 @@ def input_data_definitions(self): if self.input_mode == "Dense": d[self.key_inputs] = DataDefinition([-1, -1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") elif self.input_mode == "Autoregression_First": - d[self.key_inputs] = DataDefinition([-1, 1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") + d[self.key_inputs] = DataDefinition([-1, self.input_size], [torch.Tensor], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") - if "Autoregression" in self.input_mode: - d[self.key_input_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size]) + # Input hidden state + if self.initial_state == "Input": + d[self.key_input_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size], [torch.tensor], "Batch of RNN last states") return d @@ -188,8 +191,9 @@ def output_data_definitions(self): # Only last prediction. d[self.key_predictions] = DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") + # Output hidden state stream if self.output_last_state: - d[self.key_output_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size]) + d[self.key_output_state] = DataDefinition([-1, 2 if self.cell_type == 'LSTM' else 1, self.input_size, 1, self.hidden_size], [torch.tensor], "Batch of RNN last states") return d @@ -203,31 +207,47 @@ def forward(self, data_dict): - predictions: returned output with predictions (log_probs) [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE] """ + inputs = None + batch_size = None + # Get inputs [BATCH_SIZE x SEQ_LEN x INPUT_SIZE] if "None" in self.input_mode: - batch_size = data_dict[self.key_input_state].shape[0] - inputs = torch.zeros(1, self.input_size, self.hidden_size) + batch_size = data_dict[self.key_input_state][0].shape[1] + inputs = torch.zeros(batch_size, 1, self.hidden_size) + if next(self.parameters()).is_cuda: + inputs = inputs.cuda() + else: - batch_size = inputs.shape[0] inputs = data_dict[self.key_inputs] - + if inputs.dim() == 2: + inputs = inputs.unsqueeze(1) + batch_size = inputs.shape[0] # Initialize hidden state. - hidden = self.initialize_hiddens_state(batch_size) + if self.initial_state == "Input": + hidden = data_dict[self.key_input_state] + else: + hidden = self.initialize_hiddens_state(batch_size) + + activations = [] - # Propagate inputs through rnn cell. + # Autoregressive mode - feed back outputs in the input if "Autoregression" in self.input_mode: activations_partial, hidden = self.rnn_cell(inputs, hidden) activations += [activations_partial] + # Feed back the outputs iteratively for i in range(self.autoregression_length - 1): activations_partial, hidden = self.rnn_cell(activations_partial, hidden) + # Add the single step output into list if self.prediction_mode == "Dense": activations += [activations_partial] + # Reassemble all the outputs from list into an output sequence if self.prediction_mode == "Dense": activations = torch.stack(activations, 1) else: activations = activations_partial + # Normal mode - feed the entire input sequence at once else: activations, hidden = self.rnn_cell(inputs, hidden) From 1beb8449eb2eb1c1c248b749b43705919850159c Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta <43558388+tkornut@users.noreply.github.com> Date: Tue, 23 Apr 2019 11:15:22 -0700 Subject: [PATCH 38/39] Update seq2seq_rnn.py author in copyright;) --- ptp/components/models/seq2seq_rnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ptp/components/models/seq2seq_rnn.py b/ptp/components/models/seq2seq_rnn.py index c8d1bdb..813ab92 100644 --- a/ptp/components/models/seq2seq_rnn.py +++ b/ptp/components/models/seq2seq_rnn.py @@ -1,4 +1,4 @@ -# Copyright (C) tkornuta, IBM Corporation 2019 +# Copyright (C) aasseman, IBM Corporation 2019 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From df075d4f664f3bdbd214898fecbfe4af15f5620e Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta <43558388+tkornut@users.noreply.github.com> Date: Tue, 23 Apr 2019 11:33:29 -0700 Subject: [PATCH 39/39] Update __init__.py Missing comma after 'Seq2Seq_RNN', in __init__ --- ptp/components/models/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ptp/components/models/__init__.py b/ptp/components/models/__init__.py index b6bad0f..3451d2f 100644 --- a/ptp/components/models/__init__.py +++ b/ptp/components/models/__init__.py @@ -20,7 +20,7 @@ 'Model', 'RecurrentNeuralNetwork', 'SentenceEmbeddings', - 'Seq2Seq_RNN' + 'Seq2Seq_RNN', 'ElementWiseMultiplication', 'MultimodalCompactBilinearPooling', ]