Skip to content
This repository was archived by the owner on Jul 18, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ split: training
# Options: all | c1 | c2 | c3 | c4 (or any combination of the latter 4)
categories: all

# Flag indicating whether the problem will load and return images (LOADED)
stream_images: True

# Flag indicating whether images will be preloaded (i.e. loaded once at start) (LOADED)
# WARNING: if this option is active, the images will also be "preprocessed" at start.
# This means that preloading should not be used when one needs to use the random augmentations!
preload_images: False

# Resize parameter (LOADED)
# When present, resizes the images from original size to [height, width]
# Depth remains set to 3.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,28 @@
# Load config defining problems for training, validation and testing.
default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml

pipeline:
name: c4_word_answer_onehot_bow
# Training parameters:
training:
problem:
categories: C4
batch_size: 512
# In here we won't use images at all.
stream_images: False
dataloader:
num_workers: 0

# Validation parameters:
validation:
problem:
categories: C4
batch_size: 512
# In here we won't use images at all.
stream_images: False
dataloader:
num_workers: 0


pipeline:
# Answer encoding.
answer_tokenizer:
type: SentenceTokenizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ hyperparameters:
# Final classifier: FFN.
answer_classifier_hidden_sizes_val: &answer_classifier_hidden_sizes_val [83]

batch_size: &batch_size 64
batch_size: &batch_size 200
preload_images: &preload_images True
num_workers: &num_workers 0

# Training parameters:
training:
Expand All @@ -49,10 +51,15 @@ training:
# Appy all preprocessing/data augmentations.
question_preprocessing: *question_preprocessing
image_preprocessing: *image_preprocessing
# Preload images.
preload_images: *preload_images
streams:
questions: tokenized_questions
sampler:
weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
# Use four workers for loading images.
dataloader:
num_workers: *num_workers

# Optimizer parameters:
optimizer:
Expand All @@ -67,14 +74,19 @@ training:

# Validation parameters:
validation:
partial_validation_interval: 100
problem:
batch_size: *batch_size
categories: C1,C2,C3
# Appy all preprocessing/data augmentations.
question_preprocessing: *question_preprocessing
image_preprocessing: *image_preprocessing
# Preload images.
preload_images: *preload_images
streams:
questions: tokenized_questions
dataloader:
num_workers: *num_workers


pipeline:
Expand Down
106 changes: 81 additions & 25 deletions ptp/components/problems/image_text_to_class/vqa_med_2019.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ def __init__(self, name, config):
self.key_category_names = self.stream_keys["category_names"]
self.key_image_sizes = self.stream_keys["image_sizes"]

# Get flag informing whether we want to stream images or not.
self.stream_images = self.config['stream_images']

# Get flag indicating whether we want to (pre)aload all images at the start.
self.preload_images = self.config['preload_images']

# Check the desired image size.
if len(self.config['resize_image']) != 2:
self.logger.error("'resize_image' field must contain 2 values: the desired height and width")
Expand Down Expand Up @@ -275,12 +281,15 @@ def output_data_definitions(self):
# Add all "standard" streams.
d = {
self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"),
self.key_images: DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]"),
self.key_image_ids: DataDefinition([-1, 1], [list, str], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]"),
self.key_image_sizes: DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]"),
self.key_category_ids: DataDefinition([-1], [torch.Tensor], "Batch of target category indices, each being a single index [BATCH_SIZE]"),
self.key_category_names: DataDefinition([-1, 1], [list, str], "Batch of category target names, each being a single word [BATCH_SIZE] x [STRING]"),
}

# Return images only when required.
if self.stream_images:
d[self.key_images] = DataDefinition([-1, self.depth, self.height, self.width], [torch.Tensor], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]")
d[self.key_image_sizes] = DataDefinition([-1, 2], [torch.Tensor], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]")

# Add stream with questions.
if 'tokenize' in self.question_preprocessing:
Expand Down Expand Up @@ -541,16 +550,25 @@ def load_dataset(self, source_files, source_image_folders, source_categories):
False
)

# Add record to dataset.
dataset.append({
# Create item "dictionary".
item = {
# Image name and path leading to it.
self.key_image_ids: row[self.key_image_ids],
"image_folder": image_folder,
self.key_questions: preprocessed_question,
self.key_answers: preprocessed_answer,
# Add category.
self.key_category_ids: category
})
}

# Preload image.
if self.preload_images and self.stream_images:
img, img_size = self.get_image(row[self.key_image_ids], image_folder)
item[self.key_images] = img
item[self.key_image_sizes] = img_size

# Add item to dataset.
dataset.append(item)

t.update()
t.close()
Expand Down Expand Up @@ -601,16 +619,25 @@ def load_testset(self, data_file, image_folder):
else:
preprocessed_answer = answer

# Add record to dataset.
dataset.append({
# Create item "dictionary".
item = {
# Image name and path leading to it.
self.key_image_ids: row[self.key_image_ids],
"image_folder": image_folder,
self.key_questions: preprocessed_question,
self.key_answers: preprocessed_answer,
# Add category.
self.key_category_ids: category_id
})
}

# Preload image.
if self.preload_images and self.stream_images:
img, img_size = self.get_image(row[self.key_image_ids], image_folder)
item[self.key_images] = img
item[self.key_image_sizes] = img_size

# Add item to dataset.
dataset.append(item)

t.update()
t.close()
Expand All @@ -619,22 +646,17 @@ def load_testset(self, data_file, image_folder):
# Return the created list.
return dataset


def __getitem__(self, index):
def get_image(self, img_id, img_folder):
"""
Getter method to access the dataset and return a single sample.
Function loads and returns image along with its size.
Additionally, it performs all the required transformations.

:param index: index of the sample to return.
:type index: int
:param img_id: Identifier of the images.
:param img_folder: Path to the image.

:return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'})
:return: image (Tensor), image size (Tensor, w,h, both scaled to (0,1>)
"""
# Get item.
item = self.dataset[index]

# Load the adequate image.
img_id = item[self.key_image_ids]
img_folder = item["image_folder"]
extension = '.jpg'
# Load the image.
img = Image.open(os.path.join(img_folder, img_id + extension))
Expand Down Expand Up @@ -665,14 +687,47 @@ def __getitem__(self, index):
# Apply transformations.
img = transforms_com(img)

# Get scaled image size.
img_size = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)])

# Return image and size.
return img, img_size

def __getitem__(self, index):
"""
Getter method to access the dataset and return a single sample.

:param index: index of the sample to return.
:type index: int

:return: DataDict({'indices', 'images', 'images_ids','questions', 'answers', 'category_ids', 'image_sizes'})
"""
# Get item.
item = self.dataset[index]

# Create the resulting sample (data dict).
data_dict = self.create_data_dict(index)

# Image related variables.
data_dict[self.key_images] = img
# Load and stream the image ids.
img_id = item[self.key_image_ids]
data_dict[self.key_image_ids] = img_id
# Scale width and height to range (0,1).
data_dict[self.key_image_sizes] = torch.FloatTensor([float(height/self.scale_image_height), float(width/self.scale_image_width)])

# Load the adequate image - only when required.
if self.stream_images:

if self.preload_images:
# Use preloaded values.
img = item[self.key_images]
img_size = item[self.key_image_sizes]
else:
# Load at the very moment.
img, img_size = self.get_image(img_id, item["image_folder"])

# Image related variables.
data_dict[self.key_images] = img

# Scale width and height to range (0,1).
data_dict[self.key_image_sizes] = img_size

# Apply question transformations.
preprocessed_question = item[self.key_questions]
Expand Down Expand Up @@ -728,9 +783,10 @@ def collate_fn(self, batch):
data_dict = self.create_data_dict([sample[self.key_indices] for sample in batch])

# Stack images.
data_dict[self.key_images] = torch.stack([item[self.key_images] for item in batch]).type(torch.FloatTensor)
data_dict[self.key_image_ids] = [item[self.key_image_ids] for item in batch]
data_dict[self.key_image_sizes] = torch.stack([item[self.key_image_sizes] for item in batch]).type(torch.FloatTensor)
if self.stream_images:
data_dict[self.key_images] = torch.stack([item[self.key_images] for item in batch]).type(torch.FloatTensor)
data_dict[self.key_image_sizes] = torch.stack([item[self.key_image_sizes] for item in batch]).type(torch.FloatTensor)

# Collate lists/lists of lists.
data_dict[self.key_questions] = [item[self.key_questions] for item in batch]
Expand Down