IBM · tkornuta-ibm · Jun 5, 2019 · Jun 4, 2019 · Jun 4, 2019 · Jun 5, 2019
diff --git a/configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml b/configs/clevr/clevr_all_vgg_glove_lstm_concat_ffn.yml
@@ -0,0 +1,111 @@
+# Load config defining CLEVR problems for training, validation and testing.
+default_configs: clevr/default_clevr.yml
+
+# Resize and normalize images - in all sets.
+training:
+  problem: 
+    resize_image: [224, 224]
+    image_preprocessing: normalize
+
+validation:
+  problem: 
+    resize_image: [224, 224]
+    image_preprocessing: normalize
+
+test:
+  problem: 
+    resize_image: [224, 224]
+    image_preprocessing: normalize
+
+# Definition of the pipeline.
+pipeline:
+
+  global_publisher:
+    priority: 0
+    type: GlobalVariablePublisher
+    keys: [question_encoder_output_size, image_encoder_output_size]
+    values: [100, 100]
+
+  ##################################################################
+  # 1st pipeline: question.
+  # Questions encoding.
+  question_tokenizer:
+    priority: 1.1
+    type: SentenceTokenizer
+    # Lowercase all letters + remove punctuation (reduced vocabulary of 80 words instead of 87)
+    preprocessing: all
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/CLEVR_v1.0
+    word_mappings_file: questions.all.word.mappings.lowercase.csv
+    export_word_mappings_to_globals: True
+    globals:
+      word_mappings: question_word_mappings
+      vocabulary_size: num_question_words
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+
+  # Model 2: RNN
+  lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Zero
+    hidden_size: 50
+    # Turn of softmax.
+    use_logsoftmax: False
+    streams:
+      inputs: embedded_questions
+      predictions: question_activations
+    globals:
+      input_size: embeddings_size
+      prediction_size: question_encoder_output_size
+
+  ##################################################################
+  # 2nd subpipeline: image.
+  # Image encoder.
+  image_encoder:
+    priority: 2.1
+    type: TorchVisionWrapper
+    model_type: vgg16
+    streams:
+      inputs: images
+      outputs: image_activations
+    globals:
+      output_size: image_encoder_output_size
+
+  ##################################################################
+  # 3rd subpipeline: concatenation + FF.
+  concat:
+    type: Concatenation
+    priority: 3.1
+    input_streams: [question_activations,image_activations]
+    dim: 1 # default
+    input_dims: [[-1,100],[-1,100]]
+    output_dims: [-1,200]
+    streams:
+      outputs: concatenated_activations
+    globals:
+      output_size: concatenated_size
+
+  classifier:
+    type: FeedForwardNetwork 
+    hidden_sizes: [100]
+    priority: 3.2
+    streams:
+      inputs: concatenated_activations
+    globals:
+      input_size: concatenated_size
+      prediction_size: num_answers
+
+#: pipeline
diff --git a/configs/clevr/clevr_image_convnet_ffn.yml b/configs/clevr/clevr_image_convnet_ffn.yml
@@ -0,0 +1,37 @@
+# Load config defining CLEVR problems for training, validation and testing.
+default_configs: clevr/default_clevr.yml
+
+# Definition of the pipeline.
+pipeline:
+
+  # Model consisting of two components.
+  image_encoder:
+    priority: 1.1
+    type: ConvNetEncoder
+    streams:
+      inputs: images
+
+  # Reshape inputs
+  reshaper:
+    priority: 1.2
+    type: ReshapeTensor
+    input_dims: [-1, 16, 58, 38]
+    output_dims: [-1, 35264]
+    streams:
+      inputs: feature_maps
+      outputs: reshaped_maps
+    globals:
+      output_size: reshaped_maps_size
+
+  # Image classifier.
+  classifier:
+    priority: 1.3
+    type: FeedForwardNetwork 
+    hidden_sizes: [1000]
+    streams:
+      inputs: reshaped_maps
+    globals:
+      input_size: reshaped_maps_size
+      prediction_size: num_answers
+
+#: pipeline
diff --git a/configs/clevr/clevr_question_glove_lstm.yml b/configs/clevr/clevr_question_glove_lstm.yml
@@ -0,0 +1,61 @@
+# Load config defining CLEVR problems for training, validation and testing.
+default_configs: clevr/default_clevr.yml
+
+# This is unimodal (questino-based) baseline, thus stop streaming images - in all sets.
+training:
+  problem: 
+    stream_images: False
+
+validation:
+  problem: 
+    stream_images: False
+
+test:
+  problem: 
+    stream_images: False
+
+# Definition of the pipeline.
+pipeline:
+
+  # Questions encoding.
+  question_tokenizer:
+    priority: 1.1
+    type: SentenceTokenizer
+    # Lowercase all letters + remove punctuation (reduced vocabulary of 80 words instead of 87)
+    preprocessing: all
+    streams: 
+      inputs: questions
+      outputs: tokenized_questions
+
+  # Model 1: Embeddings
+  question_embeddings:
+    priority: 1.2
+    type: SentenceEmbeddings
+    embeddings_size: 50
+    pretrained_embeddings_file: glove.6B.50d.txt
+    data_folder: ~/data/CLEVR_v1.0
+    word_mappings_file: questions.all.word.mappings.lowercase.csv
+    export_word_mappings_to_globals: True
+    globals:
+      word_mappings: question_word_mappings
+      vocabulary_size: num_question_words
+    streams:
+      inputs: tokenized_questions
+      outputs: embedded_questions      
+
+  # Model 2: RNN
+  lstm:
+    priority: 1.3
+    type: RecurrentNeuralNetwork
+    cell_type: LSTM
+    prediction_mode: Last
+    initial_state: Zero
+    hidden_size: 50
+    streams:
+      inputs: embedded_questions
+    globals:
+      input_size: embeddings_size
+      prediction_size: num_answers
+
+
+#: pipeline
diff --git a/configs/clevr/default_clevr.yml b/configs/clevr/default_clevr.yml
@@ -0,0 +1,103 @@
+# Training parameters:
+training:
+  problem: 
+    type: CLEVR
+    batch_size: &b 64
+    split: training
+    #resize_image: [224, 224]
+  # optimizer parameters:
+  optimizer:
+    type: Adam
+    lr: 0.0001
+  # settings parameters
+  terminal_conditions:
+    loss_stop_threshold: 0.05
+    early_stop_validations: -1
+    episode_limit: 10000
+    epoch_limit: 10
+
+# Validation parameters:
+validation:
+  problem:
+    type: CLEVR
+    batch_size: *b
+    split: validation
+    #resize_image: [224, 224]
+
+# Testing parameters:
+test:
+  problem:
+    type: CLEVR
+    batch_size: *b
+    split: test
+    #resize_image: [224, 224]
+
+pipeline:
+  disable: image_viewer
+
+  label_to_target:
+    type: LabelIndexer
+    priority: 0.1
+    # Load word mappings for answers.
+    data_folder: ~/data/CLEVR_v1.0
+    word_mappings_file: answers.all.word.mappings.csv
+    export_word_mappings_to_globals: True
+    globals:
+      word_mappings: answer_word_mappings
+      vocabulary_size: num_answers
+    streams:
+      inputs: answers 
+      outputs: target_answers
+
+
+  # Loss
+  nllloss:
+    type: NLLLoss
+    priority: 10.1
+    streams:
+      targets: target_answers
+
+  # Statistics.
+  batch_size:
+    priority: 100.0
+    type: BatchSizeStatistics
+
+  accuracy:
+    priority: 100.1
+    type: AccuracyStatistics
+    streams:
+      targets: target_answers
+
+  precision_recall:
+    priority: 100.2
+    type: PrecisionRecallStatistics
+    use_word_mappings: True
+    show_class_scores: True
+    globals:
+      word_mappings: answer_word_mappings
+    streams:
+      targets: target_answers
+
+  answer_decoder:
+    priority: 100.3
+    type: WordDecoder
+    import_word_mappings_from_globals: True
+    globals:
+      word_mappings: answer_word_mappings
+    streams:
+      inputs: predictions
+      outputs: predicted_answers
+
+  stream_viewer:
+    priority: 100.4
+    type: StreamViewer
+    input_streams: indices, questions, target_answers, predicted_answers
+
+  #image_viewer:
+  #  priority: 100.5
+  #  type: ImageToClassViewer
+  #  streams:
+  #    images: inputs
+  #    labels: labels
+  #    answers: answers
+
diff --git a/configs/default/components/problems/image_text_to_class/clevr.yml b/configs/default/components/problems/image_text_to_class/clevr.yml
@@ -0,0 +1,79 @@
+# This file defines the default values for the CLEVR problem.
+
+####################################################################
+# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
+####################################################################
+
+# Folder where problem will store data (LOADED)
+data_folder: '~/data/CLEVR_v1.0'
+
+# Defines the set (split) that will be used (LOADED)
+# Options: training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation
+split: training
+
+# Flag indicating whether the problem will load and return images (LOADED)
+stream_images: True
+
+# Resize parameter (LOADED)
+# When present, resizes the images from original size to [height, width]
+# Depth remains set to 3.
+#resize_image: [height, width]
+
+# Select applied image preprocessing/augmentations (LOADED)
+# Use one (or more) of the affine transformations:
+# none | normalize | all
+# Accepted formats: a,b,c or [a,b,c]
+image_preprocessing: none
+
+streams:
+  ####################################################################
+  # 2. Keymappings associated with INPUT and OUTPUT streams.
+  ####################################################################
+
+  # Stream containing batch of indices (OUTPUT)
+  # Every problem MUST return that stream.
+  indices: indices
+
+  # Stream containing batch of images (OUTPUT)
+  images: images
+
+  # Stream containing batch of image names (OUTPUT)
+  image_ids: image_ids
+
+  # Stream containing batch of questions (OUTPUT)
+  questions: questions
+
+  # Stream containing targets - answers (OUTPUT)
+  answers: answers
+
+  # Stream containing scene descriptions (OUTPUT)
+  #answers: scene_graphs
+
+  # Stream containing batch with question type - indices (OUTPUT)
+  category_ids: question_type_ids
+
+  # Stream containing batch with question type - names (OUTPUT)
+  category_names: question_type_names
+
+globals:
+  ####################################################################
+  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
+  ####################################################################
+
+  ####################################################################
+  # 4. Keymappings associated with GLOBAL variables that will be SET.
+  ####################################################################
+
+  # Width of the image (SET)
+  input_width: image_width
+  # Height of the image (SET)
+  input_height: image_height
+  # Depth of the image (SET)
+  input_depth: image_depth
+
+  # Question type (word-idx) mappings (SET)
+  question_type_word_mappings: question_type_word_mappings
+
+  ####################################################################
+  # 5. Keymappings associated with statistics that will be ADDED.
+  ####################################################################