Skip to content
This repository was archived by the owner on Jul 18, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
8e5922f
Merge pull request #2 from IBM/develop
tkornuta-ibm Apr 10, 2019
355106e
Merge pull request #4 from IBM/develop
tkornuta-ibm Apr 12, 2019
a9c8439
Merge branch 'master' of github.com-tkornut:IBM/pytorchpipe into proc…
tkornuta-ibm Apr 29, 2019
6f421bd
removed data augmentations from c2 configs
tkornuta-ibm Apr 29, 2019
8e7d08b
added using config name as pipeline name in trainer
tkornuta-ibm Apr 29, 2019
e6d0344
Merge branch 'develop' of github.com-tkornut:IBM/pytorchpipe into c12…
tkornuta-ibm Apr 29, 2019
94f1e63
resnet50 feature_maps mode and c2 configs using resnet50 with EWM/RN
tkornuta-ibm Apr 30, 2019
454893c
Merge branch 'develop' of github.com-tkornut:IBM/pytorchpipe into c12…
tkornuta-ibm Apr 30, 2019
5940c24
c123_binary_lstm_vgg16_cat_ffn_loss.yml
tkornuta-ibm Apr 30, 2019
2371628
c123_no_binary_lstm_resnet152_is_cat_ffn_loss.yml
tkornuta-ibm Apr 30, 2019
d5a11d0
123_no_binary_lstm_resnet50_is_cat_ffn_loss
tkornuta-ibm Apr 30, 2019
b7b619a
cleanup of c123_no_binary cat pipelines with different image encoders
tkornuta-ibm Apr 30, 2019
ef65ef6
c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml + cleanups of v2…
tkornuta-ibm Apr 30, 2019
43f83bc
c123_no_binary_lstm_resnet50_ewm_is_cat_ffn_loss.yml
tkornuta-ibm Apr 30, 2019
3098a07
cleanup: priorities first
tkornuta-ibm Apr 30, 2019
0aaa1a5
trainer fix: using name of config file when pipeline name not present
tkornuta-ibm Apr 30, 2019
6ad9c28
cleanup and rename of simple vf configs
tkornuta-ibm Apr 30, 2019
d8c588c
lstm_vgg16_is_cat_ffn_only_yn_loss.yml
tkornuta-ibm Apr 30, 2019
529fb99
configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_no_yn_loss.yml
tkornuta-ibm Apr 30, 2019
1c4722a
extend_answers working on tokenized_answers returned from problem
tkornuta-ibm Apr 30, 2019
da3c2f8
default_extended_answers: predicted_answers
tkornuta-ibm Apr 30, 2019
32706a9
configs/vqa_med_2019/vf/lstm_vgg16_is_cat_ffn_c123_binary_yn_loss.yml
tkornuta-ibm Apr 30, 2019
5556abd
configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss…
tkornuta-ibm Apr 30, 2019
a3ff779
lstm_resnet50_is_cat_ffn_c123_no_binary_loss
tkornuta-ibm Apr 30, 2019
48c7287
lstm_resnet50_ewm_is_cat_ffn_c123_no_binary_loss.yml
tkornuta-ibm Apr 30, 2019
9f935ab
lstm_resnet50_ewm_is_cat_ffn_c123_loss_ffn_yn_loss.yml
tkornuta-ibm Apr 30, 2019
7db986f
increased loss treshold: 1e-3
tkornuta-ibm Apr 30, 2019
39242f6
Add option to ignore words in BLEU
aasseman Apr 30, 2019
ecc69df
extend answers - added second exported that creates the submission file
tkornuta-ibm Apr 30, 2019
f0c037c
vqa attention
Apr 30, 2019
e486dc7
separator export added to stream_file_exporter
tkornuta-ibm Apr 30, 2019
61be38d
separator export fix
tkornuta-ibm Apr 30, 2019
ff9ce7d
Merge pull request #32 from aasseman/feat/bleu-ignore-words
tkornuta-ibm Apr 30, 2019
eb83d56
Merge pull request #28 from IBM/c123_pipelines
tkornuta-ibm Apr 30, 2019
6662e08
Merge branch 'vqa-attention' of github.com-tkornut:Drajan/pytorchpipe…
tkornuta-ibm Apr 30, 2019
3bae683
attention cleanups
tkornuta-ibm Apr 30, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion configs/default/components/models/sentence_embeddings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ source_vocabulary_files: ''
# Additional tokens that will be added to vocabulary (LOADED)
# This list can be extended, but <PAD> and <EOS> are special tokens.
# <PAD> is ALWAYS used for padding shorter sequences.
additional_tokens: '<PAD>,<EOS>'
additional_tokens: '<PAD>'

# Enable <EOS> (end of sequence) token.
eos_token: False
Expand Down
58 changes: 58 additions & 0 deletions configs/default/components/models/vqa/attention.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# This file defines the default values for the VQA_Attention model.

####################################################################
# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
####################################################################

# Dropout rate (LOADED)
# Default: 0 (means that it is turned off)
dropout_rate: 0

# Size of the latent space (LOADED)
latent_size: 100

# Number of attention heads (LOADED)
num_attention_heads: 2


streams:
####################################################################
# 2. Keymappings associated with INPUT and OUTPUT streams.
####################################################################

# Stream containing batch of encoded images (INPUT)
feature_maps: feature_maps

# Stream containing batch of encoded questions (INPUT)
question_encodings: question_encodings

# Stream containing outputs (OUTPUT)
outputs: outputs

globals:
####################################################################
# 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
####################################################################

# Height of the features tensor (RETRIEVED)
feature_maps_height: feature_maps_height

# Width of the features tensor (RETRIEVED)
feature_maps_width: feature_maps_width

# Depth of the features tensor (RETRIEVED)
feature_maps_depth: feature_maps_depth

# Size of the question encodings input (RETRIEVED)
question_encoding_size: question_encoding_size

# Size of the output (RETRIEVED)
output_size: output_size

####################################################################
# 4. Keymappings associated with GLOBAL variables that will be SET.
####################################################################

####################################################################
# 5. Keymappings associated with statistics that will be ADDED.
####################################################################
3 changes: 3 additions & 0 deletions configs/default/components/publishers/bleu_statistics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ use_prediction_distributions: True
# TODO!
#use_masking: False

# Ignored words - useful for ignoring special tokens
ignored_words: ["<PAD>", "<EOS>"]

# Weights of n-grams used when calculating the score.
weights: [0.25, 0.25, 0.25, 0.25]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ input_streams: ''
# Separator that will be placed between values (LOADED)
separator: ','

# Adds additional line to output file enabling Excel to use different separator while loading (LOADED)
export_separator_line_to_csv: False

# Name of the file containing output values (LOADED)
filename: 'outputs.txt'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ training:
problem:
batch_size: 48
# Appy all preprocessing/data augmentations.
image_preprocessing: all
image_preprocessing: normalize
# none | random_affine | random_horizontal_flip | normalize | all
question_preprocessing: all
question_preprocessing: lowercase,remove_punctuation,tokenize
# none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
streams:
# Problem is returning tokenized questions.
Expand All @@ -24,7 +24,6 @@ validation:


pipeline:
name: c2_class_lstm_resnet152_ewm_cat_is

global_publisher:
priority: 0
Expand Down Expand Up @@ -96,6 +95,7 @@ pipeline:
type: FeedForwardNetwork
hidden_sizes: [100]
dropout_rate: 0.5
use_logsoftmax: False
streams:
inputs: element_wise_activations
predictions: question_image_activations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ training:
problem:
batch_size: 32
# Appy all preprocessing/data augmentations.
image_preprocessing: all
image_preprocessing: normalize
# none | random_affine | random_horizontal_flip | normalize | all
question_preprocessing: all
question_preprocessing: lowercase,remove_punctuation,tokenize
# none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all
streams:
# Problem is returning tokenized questions.
Expand All @@ -24,7 +24,6 @@ validation:


pipeline:
name: c2_class_lstm_resnet152_rn_cat_is

global_publisher:
priority: 0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Load config defining problems for training, validation and testing.
default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml

training:
problem:
batch_size: 48
# Appy all preprocessing/data augmentations.
question_preprocessing: lowercase,remove_punctuation,tokenize
streams:
# Problem is returning tokenized questions.
questions: tokenized_questions

validation:
problem:
batch_size: 48
# Appy all preprocessing/data augmentations.
question_preprocessing: lowercase,remove_punctuation,tokenize
streams:
# Problem is returning tokenized questions.
questions: tokenized_questions


pipeline:

global_publisher:
priority: 0
type: GlobalVariablePublisher
# Add input_size to globals.
keys: [question_encoder_output_size, attention_activation_size, question_image_activation_size]
values: [100, 4196, 300]

################# PIPE 0: question #################

# Model 1: Embeddings
question_embeddings:
priority: 1.2
type: SentenceEmbeddings
embeddings_size: 100
pretrained_embeddings_file: glove.6B.100d.txt
data_folder: ~/data/vqa-med
word_mappings_file: questions.all.word.mappings.csv
streams:
inputs: tokenized_questions
outputs: embedded_questions

# Model 2: RNN
question_lstm:
priority: 1.3
type: RecurrentNeuralNetwork
cell_type: LSTM
prediction_mode: Last
use_logsoftmax: False
initial_state: Trainable
dropout_rate: 0.1
hidden_size: 50
streams:
inputs: embedded_questions
predictions: question_activations
globals:
input_size: embeddings_size
prediction_size: question_encoder_output_size

################# PIPE 2: image #################
# Image encoder.
image_encoder:
priority: 3.1
type: TorchVisionWrapper
model_type: resnet50
return_feature_maps: True
streams:
inputs: images
outputs: feature_maps

################# PIPE 3: image-question fusion #################
# Attention + FF.
question_image_fusion:
priority: 4.1
type: VQA_Attention
dropout_rate: 0.5
latent_size: 100
num_attention_heads: 2
streams:
image_encodings: feature_maps
question_encodings: question_activations
outputs: attention_activations
globals:
question_encoding_size: question_encoder_output_size
output_size: attention_activation_size

classifier:
priority: 5.1
type: FeedForwardNetwork
hidden_sizes: [100]
dropout_rate: 0.5
streams:
inputs: attention_activations
globals:
input_size: attention_activation_size
prediction_size: vocabulary_size_c2


#: pipeline
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Load config defining problems for training, validation and testing.
default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml

training:
problem:
batch_size: 48
# Appy all preprocessing/data augmentations.
question_preprocessing: lowercase,remove_punctuation,tokenize
streams:
# Problem is returning tokenized questions.
questions: tokenized_questions

validation:
problem:
batch_size: 48
# Appy all preprocessing/data augmentations.
question_preprocessing: lowercase,remove_punctuation,tokenize
streams:
# Problem is returning tokenized questions.
questions: tokenized_questions


pipeline:

global_publisher:
priority: 0
type: GlobalVariablePublisher
# Add input_size to globals.
keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size]
values: [100, 100, 100, 2, 10]

################# PIPE 0: question #################

# Model 1: Embeddings
question_embeddings:
priority: 1.2
type: SentenceEmbeddings
embeddings_size: 100
pretrained_embeddings_file: glove.6B.100d.txt
data_folder: ~/data/vqa-med
word_mappings_file: questions.all.word.mappings.csv
streams:
inputs: tokenized_questions
outputs: embedded_questions

# Model 2: RNN
question_lstm:
priority: 1.3
type: RecurrentNeuralNetwork
cell_type: LSTM
prediction_mode: Last
use_logsoftmax: False
initial_state: Trainable
dropout_rate: 0.1
hidden_size: 50
streams:
inputs: embedded_questions
predictions: question_activations
globals:
input_size: embeddings_size
prediction_size: question_encoder_output_size

################# PIPE 2: image #################
# Image encoder.
image_encoder:
priority: 3.1
type: TorchVisionWrapper
model_type: resnet50
streams:
inputs: images
outputs: image_activations
globals:
output_size: image_encoder_output_size

################# PIPE 3: image-question fusion #################
# Element wise multiplication + FF.
question_image_fusion:
priority: 4.1
type: ElementWiseMultiplication
dropout_rate: 0.5
streams:
image_encodings: image_activations
question_encodings: question_activations
outputs: element_wise_activations
globals:
image_encoding_size: image_encoder_output_size
question_encoding_size: question_encoder_output_size
output_size: element_wise_activation_size

question_image_ffn:
priority: 4.2
type: FeedForwardNetwork
hidden_sizes: [100]
dropout_rate: 0.5
use_logsoftmax: False
streams:
inputs: element_wise_activations
predictions: question_image_activations
globals:
input_size: element_wise_activation_size
prediction_size: element_wise_activation_size

################# PIPE 5: image-question-image size fusion + classification #################
# Model - image size FFN.
image_size_encoder:
priority: 5.1
type: FeedForwardNetwork
streams:
inputs: image_sizes
predictions: image_size_activations
globals:
input_size: image_size_encoder_input_size
prediction_size: image_size_encoder_output_size

# 4th subpipeline: concatenation + FF.
concat:
priority: 5.2
type: Concatenation
input_streams: [question_image_activations,image_size_activations]
# Concatenation
dim: 1 # default
input_dims: [[-1,100],[-1,10]]
output_dims: [-1,110]
streams:
outputs: concatenated_activations
globals:
output_size: concatentated_activations_size


classifier:
priority: 5.3
type: FeedForwardNetwork
hidden_sizes: [100]
dropout_rate: 0.5
streams:
inputs: concatenated_activations
globals:
input_size: concatentated_activations_size
prediction_size: vocabulary_size_c2


#: pipeline
Loading