From 7e8fb02bbb0ff98ab316a66d172b34cb6d46caf8 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 15:40:17 -0700 Subject: [PATCH 01/10] minor fixes to processor (added default terminal condition etc.) --- configs/default/workers/processor.yml | 5 +++++ .../problems/image_text_to_class/__init__.py | 2 ++ ptp/workers/processor.py | 20 +++++++++---------- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/configs/default/workers/processor.yml b/configs/default/workers/processor.yml index 7737233..b8989cc 100644 --- a/configs/default/workers/processor.yml +++ b/configs/default/workers/processor.yml @@ -37,6 +37,11 @@ test: # type: RandomSampler # The rest of the content of that section is optimizer-specific... + # Terminal condition that will be used during processing. + terminal_conditions: + # Terminal condition : maximal number of episodes (Optional, -1 means that processor will perform one pass over the whole dataset/split) + episode_limit: -1 + #################################################################### # Section defining all the default values of parameters used during training. diff --git a/ptp/components/problems/image_text_to_class/__init__.py b/ptp/components/problems/image_text_to_class/__init__.py index a520d4c..eb52c35 100644 --- a/ptp/components/problems/image_text_to_class/__init__.py +++ b/ptp/components/problems/image_text_to_class/__init__.py @@ -1,5 +1,7 @@ +from .clevr import CLEVR from .vqa_med_2019 import VQAMED2019 __all__ = [ + 'CLEVR', 'VQAMED2019', ] diff --git a/ptp/workers/processor.py b/ptp/workers/processor.py index 9edad60..671db33 100644 --- a/ptp/workers/processor.py +++ b/ptp/workers/processor.py @@ -223,20 +223,20 @@ def setup_individual_experiment(self): # check if the maximum number of episodes is specified, if not put a # default equal to the size of the dataset (divided by the batch size) # So that by default, we loop over the test set once. - max_test_episodes = len(self.pm) + problem_size_in_episodes = len(self.pm) - self.config_test['problem'].add_default_params({'max_test_episodes': max_test_episodes}) - if self.config_test["problem"]["max_test_episodes"] == -1: + if self.config_test["terminal_conditions"]["episode_limit"] == -1: # Overwrite the config value! - self.config_test['problem'].add_config_params({'max_test_episodes': max_test_episodes}) + self.config_test['terminal_conditions'].add_config_params({'episode_limit': problem_size_in_episodes}) # Warn if indicated number of episodes is larger than an epoch size: - if self.config_test["problem"]["max_test_episodes"] > max_test_episodes: - self.logger.warning('Indicated maximum number of episodes is larger than one epoch, reducing it.') - self.config_test['problem'].add_config_params({'max_test_episodes': max_test_episodes}) + if self.config_test["terminal_conditions"]["episode_limit"] > problem_size_in_episodes: + self.logger.warning('Indicated limit of number of episodes is larger than one epoch, reducing it.') + # Overwrite the config value! + self.config_test['terminal_conditions'].add_config_params({'episode_limit': problem_size_in_episodes}) - self.logger.info("Setting the max number of episodes to: {}".format( - self.config_test["problem"]["max_test_episodes"])) + self.logger.info("Limiting the number of episodes to: {}".format( + self.config_test["terminal_conditions"]["episode_limit"])) ###################### PIPELINE ###################### @@ -384,7 +384,7 @@ def run_experiment(self): # Increment counter. self.app_state.episode += 1 # Terminal condition 0: max test episodes reached. - if self.app_state.episode == self.config_test["problem"]["max_test_episodes"]: + if self.app_state.episode == self.config_test["terminal_conditions"]["episode_limit"]: break # Forward pass. From 20af6c50c98e922db21ed34c4909ee113adeda62 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 15:40:38 -0700 Subject: [PATCH 02/10] CLEVR - loading questions --- configs/clevr/default_clevr.yml | 62 ++++ .../problems/image_text_to_class/clevr.yml | 72 +++++ .../problems/image_text_to_class/clevr.py | 291 ++++++++++++++++++ 3 files changed, 425 insertions(+) create mode 100644 configs/clevr/default_clevr.yml create mode 100644 configs/default/components/problems/image_text_to_class/clevr.yml create mode 100644 ptp/components/problems/image_text_to_class/clevr.py diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml new file mode 100644 index 0000000..e5303e0 --- /dev/null +++ b/configs/clevr/default_clevr.yml @@ -0,0 +1,62 @@ +# Training parameters: +training: + problem: + type: CLEVR + batch_size: &b 64 + split: training + #resize: [32, 32] + # optimizer parameters: + optimizer: + type: Adam + lr: 0.0001 + # settings parameters + terminal_conditions: + loss_stop_threshold: 0.05 + early_stop_validations: -1 + episode_limit: 10000 + epoch_limit: 10 + +# Validation parameters: +validation: + #partial_validation_interval: 100 + problem: + type: CLEVR + batch_size: *b + split: validation + #resize: [32, 32] + +# Testing parameters: +test: + problem: + type: CLEVR + batch_size: *b + split: test + #resize: [32, 32] + +pipeline: + name: default + disable: image_viewer + + # Loss + nllloss: + type: NLLLoss + priority: 10.0 + + # Statistics. + batch_size: + priority: 100.0 + type: BatchSizeStatistics + + stream_viewer: + priority: 100.4 + type: StreamViewer + input_streams: index, question + + #image_viewer: + # priority: 100.5 + # type: ImageToClassViewer + # streams: + # images: inputs + # labels: labels + # answers: answers + diff --git a/configs/default/components/problems/image_text_to_class/clevr.yml b/configs/default/components/problems/image_text_to_class/clevr.yml new file mode 100644 index 0000000..7e62f09 --- /dev/null +++ b/configs/default/components/problems/image_text_to_class/clevr.yml @@ -0,0 +1,72 @@ +# This file defines the default values for the CLEVR problem. + +#################################################################### +# 1. CONFIGURATION PARAMETERS that will be LOADED by the component. +#################################################################### + +# Folder where problem will store data (LOADED) +data_folder: '~/data/CLEVR_v1.0' + +# Defines the set (split) that will be used (LOADED) +# Options: training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation +split: training + +# Defines the categoriees that will be used (LOADED) +# Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4) +#categories: all + +# Flag indicating whether the problem will load and return images (LOADED) +stream_images: True + +streams: + #################################################################### + # 2. Keymappings associated with INPUT and OUTPUT streams. + #################################################################### + + # Stream containing batch of indices (OUTPUT) + # Every problem MUST return that stream. + indices: indices + + # Stream containing batch of images (OUTPUT) + images: images + + # Stream containing batch of image names (OUTPUT) + image_ids: image_ids + + # Stream containing batch of questions (OUTPUT) + questions: questions + + # Stream containing targets - answers (OUTPUT) + answers: answers + + # Stream containing scene descriptions (OUTPUT) + answers: scene_graphs + + # Stream containing batch with question type - indices (OUTPUT) + category_ids: question_type_id + + # Stream containing batch with question type - names (OUTPUT) + category_names: question_type_name + +globals: + #################################################################### + # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. + #################################################################### + + #################################################################### + # 4. Keymappings associated with GLOBAL variables that will be SET. + #################################################################### + + # Width of the image (SET) + input_width: image_width + # Height of the image (SET) + input_height: image_height + # Depth of the image (SET) + input_depth: image_depth + + # Question type (word-idx) mappings (SET) + question_type_word_mappings: question_type_word_mappings + + #################################################################### + # 5. Keymappings associated with statistics that will be ADDED. + #################################################################### diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py new file mode 100644 index 0000000..0d710cf --- /dev/null +++ b/ptp/components/problems/image_text_to_class/clevr.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (C) IBM Corporation 2019 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = "Tomasz Kornuta, Vincent Marois" + +import os +import string +import tqdm + +#import pandas as pd +#from PIL import Image +#import numpy as np +#import nltk +import json + +import torch +#from torchvision import transforms + +from ptp.components.problems.problem import Problem +from ptp.data_types.data_definition import DataDefinition + +#from ptp.components.utils.io import save_nparray_to_csv_file +from ptp.configuration.config_parsing import get_value_from_dictionary +from ptp.configuration.configuration_error import ConfigurationError + +class CLEVR(Problem): + """ + Problem providing data associated with CLEVR (Compositional Language andElementary Visual Reasoning) diagnostics dataset + + The dataset consists of three splits: + - A training set of 70,000 images and 699,989 questions + - A validation set of 15,000 images and 149,991 questions + - A test set of 15,000 images and 14,988 questions + - Answers for all train and val questions + - Scene graph annotations for train and val images giving ground-truth locations, attributes, and relationships for objects + - Functional program representations for all training and validation images + + CLEVR contains a total of 90 question families, eachwith a single program template and an average of four texttemplates. + Those are further aggregated into 13 Question Types: + - Querying attributes (Size, Color, Material, Shape) + - Comparing attributes (Size, Color, Material, Shape) + - Existence + - Counting + - Integer comparison (Equal, Less, More) + + For more details please refer to the associated _website or _paper for more details. + Test set with answers can be downloaded from a separate repository _repo. + + .. _website: https://cs.stanford.edu/people/jcjohns/clevr/ + + .._paper: https://arxiv.org/pdf/1612.06890 + + """ + def __init__(self, name, config): + """ + Initializes problem object. Calls base constructor. Downloads the dataset if not present and loads the adequate files depending on the mode. + + :param name: Name of the component. + + :param class_type: Class type of the component. + + :param config: Dictionary of parameters (read from configuration ``.yaml`` file). + """ + # Call constructors of parent classes. + Problem.__init__(self, name, CLEVR, config) + + # (Eventually) download required packages. + #nltk.download('punkt') + #nltk.download('stopwords') + + # Get key mappings of all output streams. + self.key_images = self.stream_keys["images"] + self.key_image_ids = self.stream_keys["image_ids"] + self.key_questions = self.stream_keys["questions"] + self.key_answers = self.stream_keys["answers"] + self.key_question_type_ids = self.stream_keys["question_type_ids"] + self.key_question_type_names = self.stream_keys["question_type_names"] + + # Get flag informing whether we want to stream images or not. + self.stream_images = self.config['stream_images'] + + # Output image dimensions. + self.height = 480 # self.config['resize_image'][0] + self.width = 320 #self.config['resize_image'][1] + self.depth = 3 + + # Set global variables - all dimensions ASIDE OF BATCH. + self.globals["image_height"] = self.height + self.globals["image_width"] = self.width + self.globals["image_depth"] = self.depth + + # Mapping of question subtypes to types. + self.question_type_subtype_mapping = { + 'query_size': 'query_attribute', + 'equal_size': 'compare_attribute', + 'query_shape': 'query_attribute', + 'query_color': 'query_attribute', + 'greater_than': 'compare_integer', + 'equal_material': 'compare_attribute', + 'equal_color': 'compare_attribute', + 'equal_shape': 'compare_attribute', + 'less_than': 'compare_integer', + 'count': 'count', + 'exist': 'exist', + 'equal_integer': 'compare_integer', + 'query_material': 'query_attribute'} + + + + # Get the absolute path. + self.data_folder = os.path.expanduser(self.config['data_folder']) + + # Get split. + split = get_value_from_dictionary('split', self.config, "training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation".split(" | ")) + + # Set split-dependent data. + if split == 'training': + # Training split folder and file with data question. + split_image_folder = os.path.join(self.data_folder, "images", "train") + data_file = os.path.join(self.data_folder, "questions", 'CLEVR_train_questions.json') + + elif split == 'validation': + # Validation split folder and file with data question. + split_image_folder = os.path.join(self.data_folder, "images", "val") + data_file = os.path.join(self.data_folder, "questions", 'CLEVR_val_questions.json') + + elif split == 'test': + # Test split folder and file with data question. + split_image_folder = os.path.join(self.data_folder, "images", "test") + data_file = os.path.join(self.data_folder, "questions", 'CLEVR_test_questions.json') + + else: # cogent + raise ConfigurationError("Split {} not supported yet".format(split)) + + # Load dataset. + self.dataset = self.load_dataset(data_file, split_image_folder) + + # Display exemplary sample. + #self.logger.info("Exemplary sample 0 ({}):\n [ category: {}\t image_ids: {}\t question: {}\t answer: {} ]".format( + # self.ix[0], + # self.category_idx_to_word[self.dataset[self.ix[0]][self.key_question_type_ids]], + # self.dataset[self.ix[0]][self.key_image_ids], + # self.dataset[self.ix[0]][self.key_questions], + # self.dataset[self.ix[0]][self.key_answers] + # )) + + + + def output_data_definitions(self): + """ + Function returns a dictionary with definitions of output data produced the component. + + :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). + """ + # Add all "standard" streams. + d = { + self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"), + self.key_image_ids: DataDefinition([-1, 1], [list, str], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]"), + self.key_question_type_ids: DataDefinition([-1], [torch.Tensor], "Batch of target question type indices, each being a single index [BATCH_SIZE]"), + self.key_question_type_names: DataDefinition([-1, 1], [list, str], "Batch of target question type names, each being a single word [BATCH_SIZE] x [STRING]"), + } + + # Return images only when required. + if self.stream_images: + d[self.key_images] = DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]") + + # Add stream with questions. + d[self.key_questions] = DataDefinition([-1, 1], [list, str], "Batch of questions, each being a string consisting of many words [BATCH_SIZE] x [STRING]") + + # Add stream with answers. + d[self.key_answers]= DataDefinition([-1, 1], [list, str], "Batch of target answers, each being a string consisting of many words [BATCH_SIZE] x [STRING]") + + return d + + + def __len__(self): + """ + Returns the "size" of the "problem" (total number of samples). + + :return: The size of the problem. + """ + return len(self.dataset) + + + def load_dataset(self, source_data_file, source_image_folder): + """ + Loads the dataset from source file + + :param source_data_file: jSON file with image ids, questions, answers, scene graphs, etc. + + :param source_image_folder: Folder containing image files. + + """ + self.logger.info("Loading dataset from:\n {}".format(source_data_file)) + # Set containing list of tuples. + dataset = [] + + with open(source_data_file) as f: + self.logger.info('Loading samples from {} ...'.format(source_data_file)) + dataset = json.load(f) + self.logger.info('Loaded {} samples'.format(len(dataset['questions']))) + print(dataset["questions"][0]) + exit(1) + + + self.logger.info("Loaded dataset consisting of {} samples".format(len(dataset))) + # Return the created list. + return dataset + + + def __getitem__(self, index): + """ + Getter method to access the dataset and return a single sample. + + :param index: index of the sample to return. + :type index: int + + :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'}) + """ + # Get item. + item = self.dataset[self.ix[index]] + + # Create the resulting sample (data dict). + data_dict = self.create_data_dict(index) + + # Load and stream the image ids. + img_id = item[self.key_image_ids] + data_dict[self.key_image_ids] = img_id + + # Load the adequate image - only when required. + if self.stream_images: + + # Image related variables. + data_dict[self.key_images] = item[self.key_images] + + # Return question. + data_dict[self.key_questions] = item[self.key_questions] + + # Return answer. + data_dict[self.key_answers] = item[self.key_answers] + + # Question type related variables. + data_dict[self.key_question_type_ids] = item[self.key_question_type_ids] + data_dict[self.key_question_type_names] = self.category_idx_to_word[item[self.key_question_type_ids]] + + # Return sample. + return data_dict + + + def collate_fn(self, batch): + """ + Combines a list of DataDict (retrieved with :py:func:`__getitem__`) into a batch. + + :param batch: list of individual samples to combine + :type batch: list + + :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'}) + + """ + # Collate indices. + data_dict = self.create_data_dict([sample[self.key_indices] for sample in batch]) + + # Stack images. + data_dict[self.key_image_ids] = [item[self.key_image_ids] for item in batch] + if self.stream_images: + data_dict[self.key_images] = torch.stack([item[self.key_images] for item in batch]).type(torch.FloatTensor) + + # Collate lists/lists of lists. + data_dict[self.key_questions] = [item[self.key_questions] for item in batch] + data_dict[self.key_answers] = [item[self.key_answers] for item in batch] + + # Stack categories. + data_dict[self.key_question_type_ids] = torch.tensor([item[self.key_question_type_ids] for item in batch]) + data_dict[self.key_question_type_names] = [item[self.key_question_type_names] for item in batch] + + # Return collated dict. + return data_dict From 3b1f87053e0d1a3cd24f2526014c4ea70e11cb5b Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 20:06:43 -0700 Subject: [PATCH 03/10] simple convnet classifier for CLEVR --- ...r_image_classification_convnet_softmax.yml | 37 ++++ configs/clevr/default_clevr.yml | 23 +- .../problems/image_text_to_class/clevr.yml | 21 +- .../default/components/text/label_indexer.yml | 2 +- .../problems/image_text_to_class/clevr.py | 204 +++++++++++++----- .../image_text_to_class/vqa_med_2019.py | 2 +- 6 files changed, 217 insertions(+), 72 deletions(-) create mode 100644 configs/clevr/clevr_image_classification_convnet_softmax.yml diff --git a/configs/clevr/clevr_image_classification_convnet_softmax.yml b/configs/clevr/clevr_image_classification_convnet_softmax.yml new file mode 100644 index 0000000..469008d --- /dev/null +++ b/configs/clevr/clevr_image_classification_convnet_softmax.yml @@ -0,0 +1,37 @@ +# Load config defining CLEVR problems for training, validation and testing. +default_configs: clevr/default_clevr.yml + +# Definition of the pipeline. +pipeline: + + # Model consisting of two components. + image_encoder: + priority: 1.1 + type: ConvNetEncoder + streams: + inputs: images + + # Reshape inputs + reshaper: + priority: 1.2 + type: ReshapeTensor + input_dims: [-1, 16, 58, 38] + output_dims: [-1, 35264] + streams: + inputs: feature_maps + outputs: reshaped_maps + globals: + output_size: reshaped_maps_size + + # Image classifier. + classifier: + priority: 1.3 + type: FeedForwardNetwork + hidden_sizes: [1000] + streams: + inputs: reshaped_maps + globals: + input_size: reshaped_maps_size + prediction_size: num_answers + +#: pipeline diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml index e5303e0..97f7fb3 100644 --- a/configs/clevr/default_clevr.yml +++ b/configs/clevr/default_clevr.yml @@ -4,7 +4,7 @@ training: type: CLEVR batch_size: &b 64 split: training - #resize: [32, 32] + #resize: [224, 224] # optimizer parameters: optimizer: type: Adam @@ -18,12 +18,11 @@ training: # Validation parameters: validation: - #partial_validation_interval: 100 problem: type: CLEVR batch_size: *b split: validation - #resize: [32, 32] + #resize: [224, 224] # Testing parameters: test: @@ -31,16 +30,28 @@ test: type: CLEVR batch_size: *b split: test - #resize: [32, 32] + #resize: [224, 224] pipeline: - name: default disable: image_viewer + label_to_target: + type: LabelIndexer + priority: 0.1 + # Load word mappings for answers. + data_folder: '~/data/CLEVR_v1.0' + word_mappings_file: 'answers.all.csv' + export_word_mappings_to_globals: True + globals: + vocabulary_size: num_answers + streams: + inputs: answers + outputs: targets + # Loss nllloss: type: NLLLoss - priority: 10.0 + priority: 10.1 # Statistics. batch_size: diff --git a/configs/default/components/problems/image_text_to_class/clevr.yml b/configs/default/components/problems/image_text_to_class/clevr.yml index 7e62f09..3914bea 100644 --- a/configs/default/components/problems/image_text_to_class/clevr.yml +++ b/configs/default/components/problems/image_text_to_class/clevr.yml @@ -11,13 +11,20 @@ data_folder: '~/data/CLEVR_v1.0' # Options: training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation split: training -# Defines the categoriees that will be used (LOADED) -# Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4) -#categories: all - # Flag indicating whether the problem will load and return images (LOADED) stream_images: True +# Resize parameter (LOADED) +# When present, resizes the images from original size to [height, width] +# Depth remains set to 3. +#resize_image: [height, width] + +# Select applied image preprocessing/augmentations (LOADED) +# Use one (or more) of the affine transformations: +# none | normalize | all +# Accepted formats: a,b,c or [a,b,c] +image_preprocessing: normalize + streams: #################################################################### # 2. Keymappings associated with INPUT and OUTPUT streams. @@ -40,13 +47,13 @@ streams: answers: answers # Stream containing scene descriptions (OUTPUT) - answers: scene_graphs + #answers: scene_graphs # Stream containing batch with question type - indices (OUTPUT) - category_ids: question_type_id + category_ids: question_type_ids # Stream containing batch with question type - names (OUTPUT) - category_names: question_type_name + category_names: question_type_names globals: #################################################################### diff --git a/configs/default/components/text/label_indexer.yml b/configs/default/components/text/label_indexer.yml index d402f43..f2c0a48 100644 --- a/configs/default/components/text/label_indexer.yml +++ b/configs/default/components/text/label_indexer.yml @@ -1,4 +1,4 @@ -# This file defines the default values for Label Indexer. +# This file defines the default values for LabelIndexer. #################################################################### # 1. CONFIGURATION PARAMETERS that will be LOADED by the component. diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py index 0d710cf..e40e411 100644 --- a/ptp/components/problems/image_text_to_class/clevr.py +++ b/ptp/components/problems/image_text_to_class/clevr.py @@ -15,26 +15,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -__author__ = "Tomasz Kornuta, Vincent Marois" +__author__ = "Tomasz Kornuta" import os -import string +#import string +import json import tqdm -#import pandas as pd -#from PIL import Image +from PIL import Image #import numpy as np #import nltk -import json import torch -#from torchvision import transforms +from torchvision import transforms from ptp.components.problems.problem import Problem from ptp.data_types.data_definition import DataDefinition #from ptp.components.utils.io import save_nparray_to_csv_file -from ptp.configuration.config_parsing import get_value_from_dictionary +from ptp.configuration.config_parsing import get_value_from_dictionary, get_value_list_from_dictionary from ptp.configuration.configuration_error import ConfigurationError class CLEVR(Problem): @@ -93,32 +92,93 @@ def __init__(self, name, config): # Get flag informing whether we want to stream images or not. self.stream_images = self.config['stream_images'] - # Output image dimensions. - self.height = 480 # self.config['resize_image'][0] - self.width = 320 #self.config['resize_image'][1] - self.depth = 3 + # Check the resize image option. + if "resize_image" in self.config: + if len(self.config['resize_image']) != 2: + self.logger.error("'resize_image' field must contain 2 values: the desired height and width") + exit(-1) + + # Output image dimensions. + self.height = self.config['resize_image'][0] + self.width = self.config['resize_image'][1] + self.depth = 3 + resize = True + else: + # Use original image dimensions. + self.height = 480 + self.width = 320 + self.depth = 3 + resize = False # Set global variables - all dimensions ASIDE OF BATCH. self.globals["image_height"] = self.height self.globals["image_width"] = self.width self.globals["image_depth"] = self.depth - # Mapping of question subtypes to types. - self.question_type_subtype_mapping = { - 'query_size': 'query_attribute', - 'equal_size': 'compare_attribute', - 'query_shape': 'query_attribute', - 'query_color': 'query_attribute', - 'greater_than': 'compare_integer', - 'equal_material': 'compare_attribute', - 'equal_color': 'compare_attribute', - 'equal_shape': 'compare_attribute', - 'less_than': 'compare_integer', - 'count': 'count', - 'exist': 'exist', - 'equal_integer': 'compare_integer', - 'query_material': 'query_attribute'} + # Get image preprocessing. + self.image_preprocessing = get_value_list_from_dictionary( + "image_preprocessing", self.config, + 'none | normalize | all'.split(" | ") + ) + if resize: + # Add resize as transformation. + if 'none' in self.image_preprocessing: + self.image_preprocessing = ["resize"] + if 'all' in self.image_preprocessing: + self.image_preprocessing = ["resize", 'normalize'] + else: + if 'none' in self.image_preprocessing: + self.image_preprocessing = [] + if 'all' in self.image_preprocessing: + self.image_preprocessing = ['normalize'] + self.logger.info("Applied image preprocessing: {}".format(self.image_preprocessing)) + + # Mapping of question subtypes to types (not used, but keeping it just in case). + #self.question_subtype_to_type_mapping = { + # 'query_size': 'query_attribute', + # 'equal_size': 'compare_attribute', + # 'query_shape': 'query_attribute', + # 'query_color': 'query_attribute', + # 'greater_than': 'compare_integer', + # 'equal_material': 'compare_attribute', + # 'equal_color': 'compare_attribute', + # 'equal_shape': 'compare_attribute', + # 'less_than': 'compare_integer', + # 'count': 'count', + # 'exist': 'exist', + # 'equal_integer': 'compare_integer', + # 'query_material': 'query_attribute'} + # Mapping of question subtypes to types. + self.question_subtype_to_id_mapping = { + 'query_size': 0, + 'equal_size': 1, + 'query_shape': 2, + 'query_color': 3, + 'greater_than': 4, + 'equal_material': 5, + 'equal_color': 6, + 'equal_shape': 7, + 'less_than': 8, + 'count': 9, + 'exist': 10, + 'equal_integer': 11, + 'query_material': 12} + + # Mapping of question families to subtypes. + self.question_family_id_to_subtype_mapping = { + 0: "equal_integer", 1: "less_than", 2: "greater_than", 3: "equal_integer", 4: "less_than", 5: "greater_than", 6: "equal_integer", 7: "less_than", 8: "greater_than", 9: "equal_size", + 10: "equal_color", 11: "equal_material", 12: "equal_shape", 13: "equal_size", 14: "equal_size", 15: "equal_size", 16: "equal_color", 17: "equal_color", 18: "equal_color", 19: "equal_material", + 20: "equal_material", 21: "equal_material", 22: "equal_shape", 23: "equal_shape", 24: "equal_shape", 25: "count", 26: "exist", 27: "query_size", 28: "query_shape", 29: "query_color", + 30: "query_material", 31: "count", 32: "query_size", 33: "query_color", 34: "query_material", 35: "query_shape", 36: "exist", 37: "exist", 38: "exist", 39: "exist", + 40: "count", 41: "count", 42: "count", 43: "count", 44: "exist", 45: "exist", 46: "exist", 47: "exist", 48: "count", 49: "count", + 50: "count", 51: "count", 52: "query_color", 53: "query_material", 54: "query_shape", 55: "query_size", 56: "query_material", 57: "query_shape", 58: "query_size", 59: "query_color", + 60: "query_shape", 61: "query_size", 62: "query_color", 63: "query_material", 64: "count", 65: "count", 66: "count", 67: "count", 68: "count", 69: "count", + 70: "count", 71: "count", 72: "count", 73: "exist", 74: "query_size", 75: "query_color", 76: "query_material", 77: "query_shape", 78: "count", 79: "exist", + 80: "query_size", 81: "query_color", 82: "query_material", 83: "query_shape", 84: "count", 85: "exist", 86: "query_shape", 87: "query_material", 88: "query_color", 89: "query_size"} + + # Finally, "merge" those two. + self.question_family_id_to_subtype_id_mapping = { key: self.question_subtype_to_id_mapping[value] for key, value in self.question_family_id_to_subtype_mapping.items() } # Get the absolute path. @@ -130,33 +190,35 @@ def __init__(self, name, config): # Set split-dependent data. if split == 'training': # Training split folder and file with data question. - split_image_folder = os.path.join(self.data_folder, "images", "train") data_file = os.path.join(self.data_folder, "questions", 'CLEVR_train_questions.json') + self.split_image_folder = os.path.join(self.data_folder, "images", "train") elif split == 'validation': # Validation split folder and file with data question. - split_image_folder = os.path.join(self.data_folder, "images", "val") data_file = os.path.join(self.data_folder, "questions", 'CLEVR_val_questions.json') + self.split_image_folder = os.path.join(self.data_folder, "images", "val") elif split == 'test': # Test split folder and file with data question. - split_image_folder = os.path.join(self.data_folder, "images", "test") data_file = os.path.join(self.data_folder, "questions", 'CLEVR_test_questions.json') + self.split_image_folder = os.path.join(self.data_folder, "images", "test") else: # cogent raise ConfigurationError("Split {} not supported yet".format(split)) # Load dataset. - self.dataset = self.load_dataset(data_file, split_image_folder) - + self.dataset = self.load_dataset(data_file) + # Display exemplary sample. - #self.logger.info("Exemplary sample 0 ({}):\n [ category: {}\t image_ids: {}\t question: {}\t answer: {} ]".format( - # self.ix[0], - # self.category_idx_to_word[self.dataset[self.ix[0]][self.key_question_type_ids]], - # self.dataset[self.ix[0]][self.key_image_ids], - # self.dataset[self.ix[0]][self.key_questions], - # self.dataset[self.ix[0]][self.key_answers] - # )) + i = 0 + self.logger.info("Exemplary sample {} ({}):\n question_type: {} ({})\n image_ids: {}\n question: {}\n answer: {}".format( + i, self.dataset[i]["question_index"], + self.question_family_id_to_subtype_mapping[self.dataset[i]["question_family_index"]], + self.question_family_id_to_subtype_id_mapping[self.dataset[i]["question_family_index"]], + self.dataset[i]["image_filename"], + self.dataset[i]["question"], + self.dataset[i]["answer"] + )) @@ -196,32 +258,60 @@ def __len__(self): return len(self.dataset) - def load_dataset(self, source_data_file, source_image_folder): + def load_dataset(self, source_data_file): """ Loads the dataset from source file :param source_data_file: jSON file with image ids, questions, answers, scene graphs, etc. - :param source_image_folder: Folder containing image files. - """ self.logger.info("Loading dataset from:\n {}".format(source_data_file)) - # Set containing list of tuples. dataset = [] with open(source_data_file) as f: self.logger.info('Loading samples from {} ...'.format(source_data_file)) - dataset = json.load(f) - self.logger.info('Loaded {} samples'.format(len(dataset['questions']))) - print(dataset["questions"][0]) - exit(1) - + dataset = json.load(f)['questions'] self.logger.info("Loaded dataset consisting of {} samples".format(len(dataset))) - # Return the created list. return dataset + def get_image(self, img_id): + """ + Function loads and returns image along with its size. + Additionally, it performs all the required transformations. + + :param img_id: Identifier of the images. + :param img_folder: Path to the image. + + :return: image (Tensor) + """ + + # Load the image. + img = Image.open(os.path.join(self.split_image_folder, img_id)) + + image_transformations_list = [] + + # Optional: resize. + if 'resize' in self.image_preprocessing: + image_transformations_list.append(transforms.Resize([self.height,self.width])) + + # Add obligatory transformation. + image_transformations_list.append(transforms.ToTensor()) + + # Optional: normalization. + if 'normalize' in self.image_preprocessing: + # Use normalization that the pretrained models from TorchVision require. + image_transformations_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) + + # Resize the image and transform to Torch Tensor. + transforms_com = transforms.Compose(image_transformations_list) + # Apply transformations. + img = transforms_com(img) + + # Return image. + return img + def __getitem__(self, index): """ Getter method to access the dataset and return a single sample. @@ -229,33 +319,33 @@ def __getitem__(self, index): :param index: index of the sample to return. :type index: int - :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'}) + :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'question_type_ids', 'question_type_names'}) """ # Get item. - item = self.dataset[self.ix[index]] + item = self.dataset[index] # Create the resulting sample (data dict). data_dict = self.create_data_dict(index) # Load and stream the image ids. - img_id = item[self.key_image_ids] + img_id = item["image_filename"] data_dict[self.key_image_ids] = img_id # Load the adequate image - only when required. if self.stream_images: - + img = self.get_image(img_id) # Image related variables. - data_dict[self.key_images] = item[self.key_images] + data_dict[self.key_images] = img # Return question. - data_dict[self.key_questions] = item[self.key_questions] + data_dict[self.key_questions] = item["question"] # Return answer. - data_dict[self.key_answers] = item[self.key_answers] + data_dict[self.key_answers] = item["answer"] # Question type related variables. - data_dict[self.key_question_type_ids] = item[self.key_question_type_ids] - data_dict[self.key_question_type_names] = self.category_idx_to_word[item[self.key_question_type_ids]] + data_dict[self.key_question_type_ids] = self.question_family_id_to_subtype_id_mapping[item["question_family_index"]] + data_dict[self.key_question_type_names] = self.question_family_id_to_subtype_mapping[item["question_family_index"]] # Return sample. return data_dict diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py index f5430b7..a739e4a 100644 --- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py +++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py @@ -287,7 +287,7 @@ def __init__(self, name, config): self.logger.info("Exported indices to '{}'".format(os.path.join(self.app_state.log_dir, self.config["export_indices"]))) # Display exemplary sample. - self.logger.info("Exemplary sample 0 ({}):\n [ category: {}\t image_ids: {}\t question: {}\t answer: {} ]".format( + self.logger.info("Exemplary sample 0 ({}):\n category: {}\n image_ids: {}\n question: {}\n answer: {}".format( self.ix[0], self.category_idx_to_word[self.dataset[self.ix[0]][self.key_category_ids]], self.dataset[self.ix[0]][self.key_image_ids], From 36bc65bf14c871e20cdfcdf394252b3bc07028a5 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 20:20:06 -0700 Subject: [PATCH 04/10] fixed issue with image, collate/pipeline_manager indices.shape[0] to be fixed --- ptp/components/problems/image_text_to_class/clevr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py index e40e411..ce660f2 100644 --- a/ptp/components/problems/image_text_to_class/clevr.py +++ b/ptp/components/problems/image_text_to_class/clevr.py @@ -288,7 +288,7 @@ def get_image(self, img_id): """ # Load the image. - img = Image.open(os.path.join(self.split_image_folder, img_id)) + img = Image.open(os.path.join(self.split_image_folder, img_id)).convert('RGB') image_transformations_list = [] From 7df05037827ac175690b553485705910fa19d4a0 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 22:25:00 -0700 Subject: [PATCH 05/10] total_loss_support bugfix --- ptp/application/pipeline_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ptp/application/pipeline_manager.py b/ptp/application/pipeline_manager.py index c5ee329..3f48cc2 100644 --- a/ptp/application/pipeline_manager.py +++ b/ptp/application/pipeline_manager.py @@ -720,7 +720,7 @@ def collect_statistics(self, stat_col, data_dict): for key in loss.loss_keys(): loss_sum += data_dict[key].cpu().item() stat_col["total_loss"] = loss_sum - stat_col["total_loss_support"] = data_dict["indices"].shape[0] # batch size + stat_col["total_loss_support"] = len(data_dict["indices"]) # batch size def add_aggregators(self, stat_agg): From e50868a0bcbf388af150e788c60c65309aed05e2 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 22:32:51 -0700 Subject: [PATCH 06/10] Added statistics and answer decoder to default clevr pipeline --- configs/clevr/default_clevr.yml | 34 +++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml index 97f7fb3..8e657c3 100644 --- a/configs/clevr/default_clevr.yml +++ b/configs/clevr/default_clevr.yml @@ -43,25 +43,55 @@ pipeline: word_mappings_file: 'answers.all.csv' export_word_mappings_to_globals: True globals: + word_mappings: answer_word_mappings vocabulary_size: num_answers streams: inputs: answers - outputs: targets + outputs: target_answers + # Loss nllloss: type: NLLLoss priority: 10.1 + streams: + targets: target_answers # Statistics. batch_size: priority: 100.0 type: BatchSizeStatistics + accuracy: + priority: 100.1 + type: AccuracyStatistics + streams: + targets: target_answers + + precision_recall: + priority: 100.2 + type: PrecisionRecallStatistics + use_word_mappings: True + show_class_scores: True + globals: + word_mappings: answer_word_mappings + streams: + targets: target_answers + + answer_decoder: + priority: 100.3 + type: WordDecoder + import_word_mappings_from_globals: True + globals: + word_mappings: answer_word_mappings + streams: + inputs: predictions + outputs: predicted_answers + stream_viewer: priority: 100.4 type: StreamViewer - input_streams: index, question + input_streams: indices, questions, target_answers, predicted_answers #image_viewer: # priority: 100.5 From 8e8ae1e73935c377fe2f3d3d946665aae4347927 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 23:04:52 -0700 Subject: [PATCH 07/10] second unimodal baseline for CLEVR --- .../clevr_question_classification_lstm.yml | 61 +++++++++++++++++++ configs/clevr/default_clevr.yml | 4 +- 2 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 configs/clevr/clevr_question_classification_lstm.yml diff --git a/configs/clevr/clevr_question_classification_lstm.yml b/configs/clevr/clevr_question_classification_lstm.yml new file mode 100644 index 0000000..5dbd0d4 --- /dev/null +++ b/configs/clevr/clevr_question_classification_lstm.yml @@ -0,0 +1,61 @@ +# Load config defining CLEVR problems for training, validation and testing. +default_configs: clevr/default_clevr.yml + +# Stop streaming images - in all sets. +training: + problem: + stream_images: False + +validation: + problem: + stream_images: False + +test: + problem: + stream_images: False + +# Definition of the pipeline. +pipeline: + + # Questions encoding. + question_tokenizer: + priority: 1.1 + type: SentenceTokenizer + # Lowercase all letters + remove punctuation (reduced vocabulary of 80 words instead of 87) + preprocessing: all + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + priority: 1.2 + type: SentenceEmbeddings + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/CLEVR_v1.0 + word_mappings_file: questions.all.word.mappings.lowercase.csv + export_word_mappings_to_globals: True + globals: + word_mappings: question_word_mappings + vocabulary_size: num_question_words + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + lstm: + priority: 1.3 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state: Zero + hidden_size: 50 + streams: + inputs: embedded_questions + globals: + input_size: embeddings_size + prediction_size: num_answers + + +#: pipeline diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml index 8e657c3..477cdda 100644 --- a/configs/clevr/default_clevr.yml +++ b/configs/clevr/default_clevr.yml @@ -39,8 +39,8 @@ pipeline: type: LabelIndexer priority: 0.1 # Load word mappings for answers. - data_folder: '~/data/CLEVR_v1.0' - word_mappings_file: 'answers.all.csv' + data_folder: ~/data/CLEVR_v1.0 + word_mappings_file: answers.all.word.mappings.csv export_word_mappings_to_globals: True globals: word_mappings: answer_word_mappings From d695b13b0a26c1a77e182875181471828c2db490 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 23:41:45 -0700 Subject: [PATCH 08/10] simple multimodal pipeline - concatenation --- .../clevr_all_vgg_glove_lstm_concat_ffn.yml | 111 ++++++++++++++++++ ...ax.yml => clevr_image_convnet_softmax.yml} | 0 ...lstm.yml => clevr_question_glove_lstm.yml} | 2 +- configs/clevr/default_clevr.yml | 6 +- .../problems/image_text_to_class/clevr.yml | 2 +- .../problems/image_text_to_class/clevr.py | 16 ++- 6 files changed, 123 insertions(+), 14 deletions(-) create mode 100644 configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml rename configs/clevr/{clevr_image_classification_convnet_softmax.yml => clevr_image_convnet_softmax.yml} (100%) rename configs/clevr/{clevr_question_classification_lstm.yml => clevr_question_glove_lstm.yml} (94%) diff --git a/configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml b/configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml new file mode 100644 index 0000000..da57752 --- /dev/null +++ b/configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml @@ -0,0 +1,111 @@ +# Load config defining CLEVR problems for training, validation and testing. +default_configs: clevr/default_clevr.yml + +# Resize and normalize images - in all sets. +training: + problem: + resize_image: [224, 224] + image_preprocessing: normalize + +validation: + problem: + resize_image: [224, 224] + image_preprocessing: normalize + +test: + problem: + resize_image: [224, 224] + image_preprocessing: normalize + +# Definition of the pipeline. +pipeline: + + global_publisher: + priority: 0 + type: GlobalVariablePublisher + keys: [question_encoder_output_size, image_encoder_output_size] + values: [100, 100] + + ################################################################## + # 1st pipeline: question. + # Questions encoding. + question_tokenizer: + priority: 1.1 + type: SentenceTokenizer + # Lowercase all letters + remove punctuation (reduced vocabulary of 80 words instead of 87) + preprocessing: all + streams: + inputs: questions + outputs: tokenized_questions + + # Model 1: Embeddings + question_embeddings: + priority: 1.2 + type: SentenceEmbeddings + embeddings_size: 50 + pretrained_embeddings_file: glove.6B.50d.txt + data_folder: ~/data/CLEVR_v1.0 + word_mappings_file: questions.all.word.mappings.lowercase.csv + export_word_mappings_to_globals: True + globals: + word_mappings: question_word_mappings + vocabulary_size: num_question_words + streams: + inputs: tokenized_questions + outputs: embedded_questions + + # Model 2: RNN + lstm: + priority: 1.3 + type: RecurrentNeuralNetwork + cell_type: LSTM + prediction_mode: Last + initial_state: Zero + hidden_size: 50 + # Turn of softmax. + use_logsoftmax: False + streams: + inputs: embedded_questions + predictions: question_activations + globals: + input_size: embeddings_size + prediction_size: question_encoder_output_size + + ################################################################## + # 2nd subpipeline: image. + # Image encoder. + image_encoder: + priority: 2.1 + type: TorchVisionWrapper + model_type: vgg16 + streams: + inputs: images + outputs: image_activations + globals: + output_size: image_encoder_output_size + + ################################################################## + # 3rd subpipeline: concatenation + FF. + concat: + type: Concatenation + priority: 3.1 + input_streams: [question_activations,image_activations] + dim: 1 # default + input_dims: [[-1,100],[-1,100]] + output_dims: [-1,200] + streams: + outputs: concatenated_activations + globals: + output_size: concatenated_size + + classifier: + type: FeedForwardNetwork + hidden_sizes: [100] + priority: 3.2 + streams: + inputs: concatenated_activations + globals: + input_size: concatenated_size + prediction_size: num_answers + +#: pipeline diff --git a/configs/clevr/clevr_image_classification_convnet_softmax.yml b/configs/clevr/clevr_image_convnet_softmax.yml similarity index 100% rename from configs/clevr/clevr_image_classification_convnet_softmax.yml rename to configs/clevr/clevr_image_convnet_softmax.yml diff --git a/configs/clevr/clevr_question_classification_lstm.yml b/configs/clevr/clevr_question_glove_lstm.yml similarity index 94% rename from configs/clevr/clevr_question_classification_lstm.yml rename to configs/clevr/clevr_question_glove_lstm.yml index 5dbd0d4..02a5399 100644 --- a/configs/clevr/clevr_question_classification_lstm.yml +++ b/configs/clevr/clevr_question_glove_lstm.yml @@ -1,7 +1,7 @@ # Load config defining CLEVR problems for training, validation and testing. default_configs: clevr/default_clevr.yml -# Stop streaming images - in all sets. +# This is unimodal (questino-based) baseline, thus stop streaming images - in all sets. training: problem: stream_images: False diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml index 477cdda..87d5d92 100644 --- a/configs/clevr/default_clevr.yml +++ b/configs/clevr/default_clevr.yml @@ -4,7 +4,7 @@ training: type: CLEVR batch_size: &b 64 split: training - #resize: [224, 224] + #resize_image: [224, 224] # optimizer parameters: optimizer: type: Adam @@ -22,7 +22,7 @@ validation: type: CLEVR batch_size: *b split: validation - #resize: [224, 224] + #resize_image: [224, 224] # Testing parameters: test: @@ -30,7 +30,7 @@ test: type: CLEVR batch_size: *b split: test - #resize: [224, 224] + #resize_image: [224, 224] pipeline: disable: image_viewer diff --git a/configs/default/components/problems/image_text_to_class/clevr.yml b/configs/default/components/problems/image_text_to_class/clevr.yml index 3914bea..0d5b571 100644 --- a/configs/default/components/problems/image_text_to_class/clevr.yml +++ b/configs/default/components/problems/image_text_to_class/clevr.yml @@ -23,7 +23,7 @@ stream_images: True # Use one (or more) of the affine transformations: # none | normalize | all # Accepted formats: a,b,c or [a,b,c] -image_preprocessing: normalize +image_preprocessing: none streams: #################################################################### diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py index ce660f2..c3ff3d0 100644 --- a/ptp/components/problems/image_text_to_class/clevr.py +++ b/ptp/components/problems/image_text_to_class/clevr.py @@ -109,6 +109,7 @@ def __init__(self, name, config): self.width = 320 self.depth = 3 resize = False + self.logger.info("Setting image size to [D x H x W]: {} x {} x {}".format(self.depth, self.height, self.width)) # Set global variables - all dimensions ASIDE OF BATCH. self.globals["image_height"] = self.height @@ -120,17 +121,14 @@ def __init__(self, name, config): "image_preprocessing", self.config, 'none | normalize | all'.split(" | ") ) + if 'none' in self.image_preprocessing: + self.image_preprocessing = [] + if 'all' in self.image_preprocessing: + self.image_preprocessing = ['normalize'] + if resize: # Add resize as transformation. - if 'none' in self.image_preprocessing: - self.image_preprocessing = ["resize"] - if 'all' in self.image_preprocessing: - self.image_preprocessing = ["resize", 'normalize'] - else: - if 'none' in self.image_preprocessing: - self.image_preprocessing = [] - if 'all' in self.image_preprocessing: - self.image_preprocessing = ['normalize'] + self.image_preprocessing = ["resize"] + self.image_preprocessing self.logger.info("Applied image preprocessing: {}".format(self.image_preprocessing)) # Mapping of question subtypes to types (not used, but keeping it just in case). From 4d39c038b90b5db6bb25d4ddcda7467c940c8d60 Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 23:44:04 -0700 Subject: [PATCH 09/10] rename --- ...levr_image_convnet_softmax.yml => clevr_image_convnet_ffn.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename configs/clevr/{clevr_image_convnet_softmax.yml => clevr_image_convnet_ffn.yml} (100%) diff --git a/configs/clevr/clevr_image_convnet_softmax.yml b/configs/clevr/clevr_image_convnet_ffn.yml similarity index 100% rename from configs/clevr/clevr_image_convnet_softmax.yml rename to configs/clevr/clevr_image_convnet_ffn.yml From 8d8c588000a4af23d54e6d7ca6455bc9e07ad12e Mon Sep 17 00:00:00 2001 From: tkornut Date: Tue, 4 Jun 2019 23:47:21 -0700 Subject: [PATCH 10/10] lgtm unused import warning fix --- ptp/components/problems/image_text_to_class/clevr.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ptp/components/problems/image_text_to_class/clevr.py b/ptp/components/problems/image_text_to_class/clevr.py index c3ff3d0..9c57794 100644 --- a/ptp/components/problems/image_text_to_class/clevr.py +++ b/ptp/components/problems/image_text_to_class/clevr.py @@ -18,13 +18,8 @@ __author__ = "Tomasz Kornuta" import os -#import string import json -import tqdm - from PIL import Image -#import numpy as np -#import nltk import torch from torchvision import transforms