IBM · tkornuta-ibm · May 4, 2019 · May 4, 2019 · May 4, 2019 · May 4, 2019
diff --git a/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml b/configs/default/components/problems/image_text_to_class/vqa_med_2019.yml
@@ -15,6 +15,14 @@ split: training
 # Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4)
 categories: all
 
+# Flag indicating whether the problem will load and return images (LOADED)
+stream_images: True
+
+# Flag indicating whether images will be preloaded (i.e. loaded once at start) (LOADED)
+# WARNING: if this option is active, the images will also be "preprocessed" at start.
+# This means that preloading should not be used when one needs to use the random augmentations!
+preload_images: False
+
 # Resize parameter (LOADED)
 # When present, resizes the images from original size to [height, width]
 # Depth remains set to 3.

diff --git a/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml b/configs/vqa_med_2019/c4_classification/c4_word_answer_onehot_bow.yml
@@ -1,9 +1,28 @@
 # Load config defining problems for training, validation and testing.
 default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml
 
-pipeline:
-  name: c4_word_answer_onehot_bow
+# Training parameters:
+training:
+  problem:
+    categories: C4
+    batch_size: 512
+    # In here we won't use images at all.
+    stream_images: False
+  dataloader:
+    num_workers: 0
+
+# Validation parameters:
+validation:
+  problem:
+    categories: C4
+    batch_size: 512
+    # In here we won't use images at all.
+    stream_images: False
+  dataloader:
+    num_workers: 0
+
 
+pipeline:
   # Answer encoding.
   answer_tokenizer:
     type: SentenceTokenizer

diff --git a/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml b/configs/vqa_med_2019/evaluation/mimic_lstm_vgg16_ewm_is_cat_ffn_c123_loss.yml
@@ -38,7 +38,9 @@ hyperparameters:
   # Final classifier: FFN.
   answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [83]
 
-  batch_size: &batch_size 64
+  batch_size: &batch_size 200
+  preload_images: &preload_images True
+  num_workers: &num_workers 0
 
 # Training parameters:
 training:
@@ -49,10 +51,15 @@ training:
     # Appy all preprocessing/data augmentations.
     question_preprocessing: *question_preprocessing
     image_preprocessing: *image_preprocessing 
+    # Preload images.
+    preload_images: *preload_images
     streams: 
       questions: tokenized_questions
   sampler:
     weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
+  # Use four workers for loading images.
+  dataloader:
+    num_workers: *num_workers
 
   # Optimizer parameters:
   optimizer:
@@ -67,14 +74,19 @@ training:
 
 # Validation parameters:
 validation:
+  partial_validation_interval: 100
   problem:
     batch_size: *batch_size
     categories: C1,C2,C3
     # Appy all preprocessing/data augmentations.
     question_preprocessing: *question_preprocessing
     image_preprocessing: *image_preprocessing 
+    # Preload images.
+    preload_images: *preload_images
     streams: 
       questions: tokenized_questions
+  dataloader:
+    num_workers: *num_workers
 
 
 pipeline:

diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py
@@ -85,6 +85,12 @@ def __init__(self, name, config):
         self.key_category_names = self.stream_keys["category_names"]
         self.key_image_sizes = self.stream_keys["image_sizes"]
 
+        # Get flag informing whether we want to stream images or not.
+        self.stream_images = self.config['stream_images']
+
+        # Get flag indicating whether we want to (pre)aload all images at the start.
+        self.preload_images = self.config['preload_images']
+
         # Check the desired image size.
         if len(self.config['resize_image']) != 2:
             self.logger.error("'resize_image' field must contain 2 values: the desired height and width")
@@ -275,12 +281,15 @@ def output_data_definitions(self):
         # Add all "standard" streams.
         d = {
             self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"),
-            self.key_images: DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]"),
             self.key_image_ids: DataDefinition([-1, 1], [list, str], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]"),
-            self.key_image_sizes: DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]"),
             self.key_category_ids: DataDefinition([-1], [torch.Tensor], "Batch of target category indices, each being a single index [BATCH_SIZE]"),
             self.key_category_names: DataDefinition([-1, 1], [list, str], "Batch of category target names, each being a single word [BATCH_SIZE] x [STRING]"),
             }
+
+        # Return images only when required.
+        if self.stream_images:
+            d[self.key_images] = DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]")
+            d[self.key_image_sizes] = DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]")
 
         # Add stream with questions.
         if 'tokenize' in self.question_preprocessing:
@@ -541,16 +550,25 @@ def load_dataset(self, source_files, source_image_folders, source_categories):
                     False
                     )
 
-                # Add record to dataset.
-                dataset.append({
+                # Create item "dictionary".
+                item = {
                     # Image name and path leading to it.
                     self.key_image_ids: row[self.key_image_ids],
                     "image_folder": image_folder,
                     self.key_questions: preprocessed_question,
                     self.key_answers: preprocessed_answer,
                     # Add category.
                     self.key_category_ids: category
-                    })
+                    }
+
+                # Preload image.
+                if self.preload_images and self.stream_images:
+                    img, img_size = self.get_image(row[self.key_image_ids], image_folder)
+                    item[self.key_images] = img
+                    item[self.key_image_sizes] = img_size
+
+                # Add item to dataset.
+                dataset.append(item)
 
                 t.update()
             t.close()
@@ -601,16 +619,25 @@ def load_testset(self, data_file, image_folder):
             else:
                 preprocessed_answer = answer 
 
-            # Add record to dataset.
-            dataset.append({
+            # Create item "dictionary".
+            item = {
                 # Image name and path leading to it.
                 self.key_image_ids: row[self.key_image_ids],
                 "image_folder": image_folder,
                 self.key_questions: preprocessed_question,
                 self.key_answers: preprocessed_answer,
                 # Add category.
                 self.key_category_ids: category_id
-                })
+                }
+
+            # Preload image.
+            if self.preload_images and self.stream_images:
+                img, img_size = self.get_image(row[self.key_image_ids], image_folder)
+                item[self.key_images] = img
+                item[self.key_image_sizes] = img_size
+
+            # Add item to dataset.
+            dataset.append(item)
 
             t.update()
         t.close()
@@ -619,22 +646,17 @@ def load_testset(self, data_file, image_folder):
         # Return the created list.
         return dataset
 
-
-    def __getitem__(self, index):
+    def get_image(self, img_id, img_folder):
         """
-        Getter method to access the dataset and return a single sample.
+        Function loads and returns image along with its size.
+        Additionally, it performs all the required transformations.
 
-        :param index: index of the sample to return.
-        :type index: int
+        :param img_id: Identifier of the images.
+        :param img_folder: Path to the image.
 
-        :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'})
+        :return: image (Tensor), image size (Tensor, w,h, both scaled to (0,1>)
         """
-        # Get item.
-        item = self.dataset[index]
 
-        # Load the adequate image.
-        img_id = item[self.key_image_ids]
-        img_folder = item["image_folder"]
         extension = '.jpg'
         # Load the image.
         img = Image.open(os.path.join(img_folder, img_id + extension))
@@ -665,14 +687,47 @@ def __getitem__(self, index):
         # Apply transformations.
         img = transforms_com(img)
 
+        # Get scaled image size.
+        img_size = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)])
+
+        # Return image and size.
+        return img, img_size
+
+    def __getitem__(self, index):
+        """
+        Getter method to access the dataset and return a single sample.
+
+        :param index: index of the sample to return.
+        :type index: int
+
+        :return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'})
+        """
+        # Get item.
+        item = self.dataset[index]
+
         # Create the resulting sample (data dict).
         data_dict = self.create_data_dict(index)
 
-        # Image related variables.
-        data_dict[self.key_images] = img
+        # Load and stream the image ids.
+        img_id = item[self.key_image_ids]
         data_dict[self.key_image_ids] = img_id
-        # Scale width and height to range (0,1).
-        data_dict[self.key_image_sizes] = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)])
+
+        # Load the adequate image - only when required.
+        if self.stream_images:
+
+            if self.preload_images:
+                # Use preloaded values.
+                img = item[self.key_images]             
+                img_size = item[self.key_image_sizes]             
+            else:
+                # Load at the very moment.
+                img, img_size = self.get_image(img_id, item["image_folder"])
+
+            # Image related variables.
+            data_dict[self.key_images] = img
+
+            # Scale width and height to range (0,1).
+            data_dict[self.key_image_sizes] = img_size
 
         # Apply question transformations.
         preprocessed_question = item[self.key_questions]
@@ -728,9 +783,10 @@ def collate_fn(self, batch):
         data_dict = self.create_data_dict([sample[self.key_indices] for sample in batch])
 
         # Stack images.
-        data_dict[self.key_images] = torch.stack([item[self.key_images] for item in batch]).type(torch.FloatTensor)
         data_dict[self.key_image_ids] = [item[self.key_image_ids] for item in batch]
-        data_dict[self.key_image_sizes] = torch.stack([item[self.key_image_sizes] for item in batch]).type(torch.FloatTensor)
+        if self.stream_images:
+            data_dict[self.key_images] = torch.stack([item[self.key_images] for item in batch]).type(torch.FloatTensor)
+            data_dict[self.key_image_sizes] = torch.stack([item[self.key_image_sizes] for item in batch]).type(torch.FloatTensor)
 
         # Collate lists/lists of lists.
         data_dict[self.key_questions] = [item[self.key_questions] for item in batch]