Skip to content
This repository was archived by the owner on Jul 18, 2024. It is now read-only.
111 changes: 111 additions & 0 deletions configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Load config defining CLEVR problems for training, validation and testing.
default_configs: clevr/default_clevr.yml

# Resize and normalize images - in all sets.
training:
problem:
resize_image: [224, 224]
image_preprocessing: normalize

validation:
problem:
resize_image: [224, 224]
image_preprocessing: normalize

test:
problem:
resize_image: [224, 224]
image_preprocessing: normalize

# Definition of the pipeline.
pipeline:

global_publisher:
priority: 0
type: GlobalVariablePublisher
keys: [question_encoder_output_size, image_encoder_output_size]
values: [100, 100]

##################################################################
# 1st pipeline: question.
# Questions encoding.
question_tokenizer:
priority: 1.1
type: SentenceTokenizer
# Lowercase all letters + remove punctuation (reduced vocabulary of 80 words instead of 87)
preprocessing: all
streams:
inputs: questions
outputs: tokenized_questions

# Model 1: Embeddings
question_embeddings:
priority: 1.2
type: SentenceEmbeddings
embeddings_size: 50
pretrained_embeddings_file: glove.6B.50d.txt
data_folder: ~/data/CLEVR_v1.0
word_mappings_file: questions.all.word.mappings.lowercase.csv
export_word_mappings_to_globals: True
globals:
word_mappings: question_word_mappings
vocabulary_size: num_question_words
streams:
inputs: tokenized_questions
outputs: embedded_questions

# Model 2: RNN
lstm:
priority: 1.3
type: RecurrentNeuralNetwork
cell_type: LSTM
prediction_mode: Last
initial_state: Zero
hidden_size: 50
# Turn of softmax.
use_logsoftmax: False
streams:
inputs: embedded_questions
predictions: question_activations
globals:
input_size: embeddings_size
prediction_size: question_encoder_output_size

##################################################################
# 2nd subpipeline: image.
# Image encoder.
image_encoder:
priority: 2.1
type: TorchVisionWrapper
model_type: vgg16
streams:
inputs: images
outputs: image_activations
globals:
output_size: image_encoder_output_size

##################################################################
# 3rd subpipeline: concatenation + FF.
concat:
type: Concatenation
priority: 3.1
input_streams: [question_activations,image_activations]
dim: 1 # default
input_dims: [[-1,100],[-1,100]]
output_dims: [-1,200]
streams:
outputs: concatenated_activations
globals:
output_size: concatenated_size

classifier:
type: FeedForwardNetwork
hidden_sizes: [100]
priority: 3.2
streams:
inputs: concatenated_activations
globals:
input_size: concatenated_size
prediction_size: num_answers

#: pipeline
37 changes: 37 additions & 0 deletions configs/clevr/clevr_image_convnet_ffn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Load config defining CLEVR problems for training, validation and testing.
default_configs: clevr/default_clevr.yml

# Definition of the pipeline.
pipeline:

# Model consisting of two components.
image_encoder:
priority: 1.1
type: ConvNetEncoder
streams:
inputs: images

# Reshape inputs
reshaper:
priority: 1.2
type: ReshapeTensor
input_dims: [-1, 16, 58, 38]
output_dims: [-1, 35264]
streams:
inputs: feature_maps
outputs: reshaped_maps
globals:
output_size: reshaped_maps_size

# Image classifier.
classifier:
priority: 1.3
type: FeedForwardNetwork
hidden_sizes: [1000]
streams:
inputs: reshaped_maps
globals:
input_size: reshaped_maps_size
prediction_size: num_answers

#: pipeline
61 changes: 61 additions & 0 deletions configs/clevr/clevr_question_glove_lstm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Load config defining CLEVR problems for training, validation and testing.
default_configs: clevr/default_clevr.yml

# This is unimodal (questino-based) baseline, thus stop streaming images - in all sets.
training:
problem:
stream_images: False

validation:
problem:
stream_images: False

test:
problem:
stream_images: False

# Definition of the pipeline.
pipeline:

# Questions encoding.
question_tokenizer:
priority: 1.1
type: SentenceTokenizer
# Lowercase all letters + remove punctuation (reduced vocabulary of 80 words instead of 87)
preprocessing: all
streams:
inputs: questions
outputs: tokenized_questions

# Model 1: Embeddings
question_embeddings:
priority: 1.2
type: SentenceEmbeddings
embeddings_size: 50
pretrained_embeddings_file: glove.6B.50d.txt
data_folder: ~/data/CLEVR_v1.0
word_mappings_file: questions.all.word.mappings.lowercase.csv
export_word_mappings_to_globals: True
globals:
word_mappings: question_word_mappings
vocabulary_size: num_question_words
streams:
inputs: tokenized_questions
outputs: embedded_questions

# Model 2: RNN
lstm:
priority: 1.3
type: RecurrentNeuralNetwork
cell_type: LSTM
prediction_mode: Last
initial_state: Zero
hidden_size: 50
streams:
inputs: embedded_questions
globals:
input_size: embeddings_size
prediction_size: num_answers


#: pipeline
103 changes: 103 additions & 0 deletions configs/clevr/default_clevr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Training parameters:
training:
problem:
type: CLEVR
batch_size: &b 64
split: training
#resize_image: [224, 224]
# optimizer parameters:
optimizer:
type: Adam
lr: 0.0001
# settings parameters
terminal_conditions:
loss_stop_threshold: 0.05
early_stop_validations: -1
episode_limit: 10000
epoch_limit: 10

# Validation parameters:
validation:
problem:
type: CLEVR
batch_size: *b
split: validation
#resize_image: [224, 224]

# Testing parameters:
test:
problem:
type: CLEVR
batch_size: *b
split: test
#resize_image: [224, 224]

pipeline:
disable: image_viewer

label_to_target:
type: LabelIndexer
priority: 0.1
# Load word mappings for answers.
data_folder: ~/data/CLEVR_v1.0
word_mappings_file: answers.all.word.mappings.csv
export_word_mappings_to_globals: True
globals:
word_mappings: answer_word_mappings
vocabulary_size: num_answers
streams:
inputs: answers
outputs: target_answers


# Loss
nllloss:
type: NLLLoss
priority: 10.1
streams:
targets: target_answers

# Statistics.
batch_size:
priority: 100.0
type: BatchSizeStatistics

accuracy:
priority: 100.1
type: AccuracyStatistics
streams:
targets: target_answers

precision_recall:
priority: 100.2
type: PrecisionRecallStatistics
use_word_mappings: True
show_class_scores: True
globals:
word_mappings: answer_word_mappings
streams:
targets: target_answers

answer_decoder:
priority: 100.3
type: WordDecoder
import_word_mappings_from_globals: True
globals:
word_mappings: answer_word_mappings
streams:
inputs: predictions
outputs: predicted_answers

stream_viewer:
priority: 100.4
type: StreamViewer
input_streams: indices, questions, target_answers, predicted_answers

#image_viewer:
# priority: 100.5
# type: ImageToClassViewer
# streams:
# images: inputs
# labels: labels
# answers: answers

79 changes: 79 additions & 0 deletions configs/default/components/problems/image_text_to_class/clevr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# This file defines the default values for the CLEVR problem.

####################################################################
# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
####################################################################

# Folder where problem will store data (LOADED)
data_folder: '~/data/CLEVR_v1.0'

# Defines the set (split) that will be used (LOADED)
# Options: training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation
split: training

# Flag indicating whether the problem will load and return images (LOADED)
stream_images: True

# Resize parameter (LOADED)
# When present, resizes the images from original size to [height, width]
# Depth remains set to 3.
#resize_image: [height, width]

# Select applied image preprocessing/augmentations (LOADED)
# Use one (or more) of the affine transformations:
# none | normalize | all
# Accepted formats: a,b,c or [a,b,c]
image_preprocessing: none

streams:
####################################################################
# 2. Keymappings associated with INPUT and OUTPUT streams.
####################################################################

# Stream containing batch of indices (OUTPUT)
# Every problem MUST return that stream.
indices: indices

# Stream containing batch of images (OUTPUT)
images: images

# Stream containing batch of image names (OUTPUT)
image_ids: image_ids

# Stream containing batch of questions (OUTPUT)
questions: questions

# Stream containing targets - answers (OUTPUT)
answers: answers

# Stream containing scene descriptions (OUTPUT)
#answers: scene_graphs

# Stream containing batch with question type - indices (OUTPUT)
category_ids: question_type_ids

# Stream containing batch with question type - names (OUTPUT)
category_names: question_type_names

globals:
####################################################################
# 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
####################################################################

####################################################################
# 4. Keymappings associated with GLOBAL variables that will be SET.
####################################################################

# Width of the image (SET)
input_width: image_width
# Height of the image (SET)
input_height: image_height
# Depth of the image (SET)
input_depth: image_depth

# Question type (word-idx) mappings (SET)
question_type_word_mappings: question_type_word_mappings

####################################################################
# 5. Keymappings associated with statistics that will be ADDED.
####################################################################
Loading