From 45624d4d6a92d5d30281df2d04e028cffa1c8179 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 11:03:02 -0700 Subject: [PATCH 1/2] Added stream_images flag to VQAMED problem, enabling to turn off/on (default:on) streaming of images and image_sizes --- .../image_text_to_class/vqa_med_2019.yml | 3 + .../c4_word_answer_onehot_bow.yml | 23 ++++- .../image_text_to_class/vqa_med_2019.py | 93 +++++++++++-------- 3 files changed, 76 insertions(+), 43 deletions(-) diff --git a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml index 3846b45..b29f344 100644 --- a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml +++ b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml @@ -15,6 +15,9 @@ split: training # Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4) categories: all +# Flag indicating whether the problem will load and return images (LOADED) +stream_images: True + # Resize parameter (LOADED) # When present, resizes the images from original size to [height, width] # Depth remains set to 3. diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml index 5ee97e7..17168c2 100644 --- a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml +++ b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml @@ -1,9 +1,28 @@ # Load config defining problems for training, validation and testing. default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml -pipeline: - name: c4_word_answer_onehot_bow +# Training parameters: +training: + problem: + categories: C4 + batch_size: 512 + # In here we won't use images at all. + stream_images: False + dataloader: + num_workers: 0 + +# Validation parameters: +validation: + problem: + categories: C4 + batch_size: 512 + # In here we won't use images at all. + stream_images: False + dataloader: + num_workers: 0 + +pipeline: # Answer encoding. answer_tokenizer: type: SentenceTokenizer diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py index e156eba..5d4d5a5 100644 --- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py +++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py @@ -85,6 +85,9 @@ def __init__(self, name, config): self.key_category_names = self.stream_keys["category_names"] self.key_image_sizes = self.stream_keys["image_sizes"] + # Get flag informing whether we want to stream images or not. + self.stream_images = self.config['stream_images'] + # Check the desired image size. if len(self.config['resize_image']) != 2: self.logger.error("'resize_image' field must contain 2 values: the desired height and width") @@ -275,12 +278,15 @@ def output_data_definitions(self): # Add all "standard" streams. d = { self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"), - self.key_images: DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]"), self.key_image_ids: DataDefinition([-1, 1], [list, str], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]"), - self.key_image_sizes: DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]"), self.key_category_ids: DataDefinition([-1], [torch.Tensor], "Batch of target category indices, each being a single index [BATCH_SIZE]"), self.key_category_names: DataDefinition([-1, 1], [list, str], "Batch of category target names, each being a single word [BATCH_SIZE] x [STRING]"), } + + # Return images only when required. + if self.stream_images: + d[self.key_images] = DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]") + d[self.key_image_sizes] = DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]") # Add stream with questions. if 'tokenize' in self.question_preprocessing: @@ -632,47 +638,51 @@ def __getitem__(self, index): # Get item. item = self.dataset[index] - # Load the adequate image. - img_id = item[self.key_image_ids] - img_folder = item["image_folder"] - extension = '.jpg' - # Load the image. - img = Image.open(os.path.join(img_folder, img_id + extension)) - # Get its width and height. - width, height = img.size - - image_transformations_list = [] - # Optional. - if 'random_affine' in self.image_preprocessing: - rotate = (-45, 80) - translate = (0.05, 0.25) - scale = (0.5, 2) - image_transformations_list.append(transforms.RandomAffine(rotate, translate, scale)) - if 'random_horizontal_flip' in self.image_preprocessing: - image_transformations_list.append(transforms.RandomHorizontalFlip()) - - # Add two obligatory transformations. - image_transformations_list.append(transforms.Resize([self.height,self.width])) - image_transformations_list.append(transforms.ToTensor()) - - # Optional normalizastion. - if 'normalize' in self.image_preprocessing: - # Use normalization that the pretrained models from TorchVision require. - image_transformations_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) - - # Resize the image and transform to Torch Tensor. - transforms_com = transforms.Compose(image_transformations_list) - # Apply transformations. - img = transforms_com(img) - # Create the resulting sample (data dict). data_dict = self.create_data_dict(index) - # Image related variables. - data_dict[self.key_images] = img + # Load and stream the image ids. + img_id = item[self.key_image_ids] data_dict[self.key_image_ids] = img_id - # Scale width and height to range (0,1). - data_dict[self.key_image_sizes] = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)]) + + # Load the adequate image - only when required. + if self.stream_images: + img_folder = item["image_folder"] + extension = '.jpg' + # Load the image. + img = Image.open(os.path.join(img_folder, img_id + extension)) + # Get its width and height. + width, height = img.size + + image_transformations_list = [] + # Optional. + if 'random_affine' in self.image_preprocessing: + rotate = (-45, 80) + translate = (0.05, 0.25) + scale = (0.5, 2) + image_transformations_list.append(transforms.RandomAffine(rotate, translate, scale)) + if 'random_horizontal_flip' in self.image_preprocessing: + image_transformations_list.append(transforms.RandomHorizontalFlip()) + + # Add two obligatory transformations. + image_transformations_list.append(transforms.Resize([self.height,self.width])) + image_transformations_list.append(transforms.ToTensor()) + + # Optional normalizastion. + if 'normalize' in self.image_preprocessing: + # Use normalization that the pretrained models from TorchVision require. + image_transformations_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) + + # Resize the image and transform to Torch Tensor. + transforms_com = transforms.Compose(image_transformations_list) + # Apply transformations. + img = transforms_com(img) + + # Image related variables. + data_dict[self.key_images] = img + + # Scale width and height to range (0,1). + data_dict[self.key_image_sizes] = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)]) # Apply question transformations. preprocessed_question = item[self.key_questions] @@ -728,9 +738,10 @@ def collate_fn(self, batch): data_dict = self.create_data_dict([sample[self.key_indices] for sample in batch]) # Stack images. - data_dict[self.key_images] = torch.stack([item[self.key_images] for item in batch]).type(torch.FloatTensor) data_dict[self.key_image_ids] = [item[self.key_image_ids] for item in batch] - data_dict[self.key_image_sizes] = torch.stack([item[self.key_image_sizes] for item in batch]).type(torch.FloatTensor) + if self.stream_images: + data_dict[self.key_images] = torch.stack([item[self.key_images] for item in batch]).type(torch.FloatTensor) + data_dict[self.key_image_sizes] = torch.stack([item[self.key_image_sizes] for item in batch]).type(torch.FloatTensor) # Collate lists/lists of lists. data_dict[self.key_questions] = [item[self.key_questions] for item in batch] From 9f9196fca87c8bf660a0876d0d0c6a06143610a5 Mon Sep 17 00:00:00 2001 From: Tomasz Kornuta Date: Sat, 4 May 2019 12:24:22 -0700 Subject: [PATCH 2/2] Added option to (pre)load the whole dataset (images mostly) to memory during problem initialization --- .../image_text_to_class/vqa_med_2019.yml | 5 + ...ic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml | 14 +- .../image_text_to_class/vqa_med_2019.py | 121 ++++++++++++------ 3 files changed, 101 insertions(+), 39 deletions(-) diff --git a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml index b29f344..715cb17 100644 --- a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml +++ b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml @@ -18,6 +18,11 @@ categories: all # Flag indicating whether the problem will load and return images (LOADED) stream_images: True +# Flag indicating whether images will be preloaded (i.e. loaded once at start) (LOADED) +# WARNING: if this option is active, the images will also be "preprocessed" at start. +# This means that preloading should not be used when one needs to use the random augmentations! +preload_images: False + # Resize parameter (LOADED) # When present, resizes the images from original size to [height, width] # Depth remains set to 3. diff --git a/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml index b3b7576..958161a 100644 --- a/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml +++ b/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml @@ -38,7 +38,9 @@ hyperparameters: # Final classifier: FFN. answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [83] - batch_size: &batch_size 64 + batch_size: &batch_size 200 + preload_images: &preload_images True + num_workers: &num_workers 0 # Training parameters: training: @@ -49,10 +51,15 @@ training: # Appy all preprocessing/data augmentations. question_preprocessing: *question_preprocessing image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images streams: questions: tokenized_questions sampler: weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv + # Use four workers for loading images. + dataloader: + num_workers: *num_workers # Optimizer parameters: optimizer: @@ -67,14 +74,19 @@ training: # Validation parameters: validation: + partial_validation_interval: 100 problem: batch_size: *batch_size categories: C1,C2,C3 # Appy all preprocessing/data augmentations. question_preprocessing: *question_preprocessing image_preprocessing: *image_preprocessing + # Preload images. + preload_images: *preload_images streams: questions: tokenized_questions + dataloader: + num_workers: *num_workers pipeline: diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py index 5d4d5a5..8e94418 100644 --- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py +++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py @@ -87,7 +87,10 @@ def __init__(self, name, config): # Get flag informing whether we want to stream images or not. self.stream_images = self.config['stream_images'] - + + # Get flag indicating whether we want to (pre)aload all images at the start. + self.preload_images = self.config['preload_images'] + # Check the desired image size. if len(self.config['resize_image']) != 2: self.logger.error("'resize_image' field must contain 2 values: the desired height and width") @@ -547,8 +550,8 @@ def load_dataset(self, source_files, source_image_folders, source_categories): False ) - # Add record to dataset. - dataset.append({ + # Create item "dictionary". + item = { # Image name and path leading to it. self.key_image_ids: row[self.key_image_ids], "image_folder": image_folder, @@ -556,7 +559,16 @@ def load_dataset(self, source_files, source_image_folders, source_categories): self.key_answers: preprocessed_answer, # Add category. self.key_category_ids: category - }) + } + + # Preload image. + if self.preload_images and self.stream_images: + img, img_size = self.get_image(row[self.key_image_ids], image_folder) + item[self.key_images] = img + item[self.key_image_sizes] = img_size + + # Add item to dataset. + dataset.append(item) t.update() t.close() @@ -607,8 +619,8 @@ def load_testset(self, data_file, image_folder): else: preprocessed_answer = answer - # Add record to dataset. - dataset.append({ + # Create item "dictionary". + item = { # Image name and path leading to it. self.key_image_ids: row[self.key_image_ids], "image_folder": image_folder, @@ -616,7 +628,16 @@ def load_testset(self, data_file, image_folder): self.key_answers: preprocessed_answer, # Add category. self.key_category_ids: category_id - }) + } + + # Preload image. + if self.preload_images and self.stream_images: + img, img_size = self.get_image(row[self.key_image_ids], image_folder) + item[self.key_images] = img + item[self.key_image_sizes] = img_size + + # Add item to dataset. + dataset.append(item) t.update() t.close() @@ -625,6 +646,52 @@ def load_testset(self, data_file, image_folder): # Return the created list. return dataset + def get_image(self, img_id, img_folder): + """ + Function loads and returns image along with its size. + Additionally, it performs all the required transformations. + + :param img_id: Identifier of the images. + :param img_folder: Path to the image. + + :return: image (Tensor), image size (Tensor, w,h, both scaled to (0,1>) + """ + + extension = '.jpg' + # Load the image. + img = Image.open(os.path.join(img_folder, img_id + extension)) + # Get its width and height. + width, height = img.size + + image_transformations_list = [] + # Optional. + if 'random_affine' in self.image_preprocessing: + rotate = (-45, 80) + translate = (0.05, 0.25) + scale = (0.5, 2) + image_transformations_list.append(transforms.RandomAffine(rotate, translate, scale)) + if 'random_horizontal_flip' in self.image_preprocessing: + image_transformations_list.append(transforms.RandomHorizontalFlip()) + + # Add two obligatory transformations. + image_transformations_list.append(transforms.Resize([self.height,self.width])) + image_transformations_list.append(transforms.ToTensor()) + + # Optional normalizastion. + if 'normalize' in self.image_preprocessing: + # Use normalization that the pretrained models from TorchVision require. + image_transformations_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) + + # Resize the image and transform to Torch Tensor. + transforms_com = transforms.Compose(image_transformations_list) + # Apply transformations. + img = transforms_com(img) + + # Get scaled image size. + img_size = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)]) + + # Return image and size. + return img, img_size def __getitem__(self, index): """ @@ -647,42 +714,20 @@ def __getitem__(self, index): # Load the adequate image - only when required. if self.stream_images: - img_folder = item["image_folder"] - extension = '.jpg' - # Load the image. - img = Image.open(os.path.join(img_folder, img_id + extension)) - # Get its width and height. - width, height = img.size - - image_transformations_list = [] - # Optional. - if 'random_affine' in self.image_preprocessing: - rotate = (-45, 80) - translate = (0.05, 0.25) - scale = (0.5, 2) - image_transformations_list.append(transforms.RandomAffine(rotate, translate, scale)) - if 'random_horizontal_flip' in self.image_preprocessing: - image_transformations_list.append(transforms.RandomHorizontalFlip()) - - # Add two obligatory transformations. - image_transformations_list.append(transforms.Resize([self.height,self.width])) - image_transformations_list.append(transforms.ToTensor()) - - # Optional normalizastion. - if 'normalize' in self.image_preprocessing: - # Use normalization that the pretrained models from TorchVision require. - image_transformations_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) - - # Resize the image and transform to Torch Tensor. - transforms_com = transforms.Compose(image_transformations_list) - # Apply transformations. - img = transforms_com(img) + + if self.preload_images: + # Use preloaded values. + img = item[self.key_images] + img_size = item[self.key_image_sizes] + else: + # Load at the very moment. + img, img_size = self.get_image(img_id, item["image_folder"]) # Image related variables. data_dict[self.key_images] = img # Scale width and height to range (0,1). - data_dict[self.key_image_sizes] = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)]) + data_dict[self.key_image_sizes] = img_size # Apply question transformations. preprocessed_question = item[self.key_questions]