Skip to content
This repository was archived by the owner on Jul 18, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
a86a543
Added more modes to RNN (untested):
aasseman Apr 18, 2019
5a7b848
c3_classification_all_bow_vgg16_concat.yml
tkornuta-ibm Apr 15, 2019
4bd9856
c1 update + all bow + vgg16 + size config
tkornuta-ibm Apr 15, 2019
ea0681c
renaming components in question categorization
tkornuta-ibm Apr 15, 2019
6bebcd2
c3 rnn+vgg
tkornuta-ibm Apr 15, 2019
96e23f0
fixed model name loading (spaces), added yes/no preprocessing and cat…
tkornuta-ibm Apr 16, 2019
101d583
c1 all rnn vgg, updated configs for categorization, c1 vf init
tkornuta-ibm Apr 16, 2019
9665d98
Added option to indicate model name while loading
tkornuta-ibm Apr 16, 2019
d1fad47
Cleaned up logging comments while loading models
tkornuta-ibm Apr 16, 2019
7611a5b
C1 variational flow - shared rnn with question categorization
tkornuta-ibm Apr 16, 2019
4faa4bc
Added out_of_vocabulary to LabelIndexer, first VQAMED variational flo…
tkornuta-ibm Apr 16, 2019
a6133f7
pipe with pretrained categorization and two losses for C1 and binary …
tkornuta-ibm Apr 16, 2019
95776c8
rename
tkornuta-ibm Apr 16, 2019
33d296c
statistics typo fix in config - c1+binary vf
tkornuta-ibm Apr 16, 2019
ee39ace
join masked predictions test
tkornuta-ibm Apr 17, 2019
b605794
Fixed masking in P/R flow 2 and 3
tkornuta-ibm Apr 17, 2019
edeeed9
cleanup c1 binary hardcoded categories from problem
tkornuta-ibm Apr 17, 2019
6d4200a
masking for hardcoded c1 binary P/R
tkornuta-ibm Apr 17, 2019
6a07a41
cleanups and config for c1_bin shared all encoders
tkornuta-ibm Apr 17, 2019
88b449f
Changed default out_of_vocabulary value to -100, which is used by PyT…
tkornuta-ibm Apr 17, 2019
d47ddd3
c1 + c2 + Y/N multimodal config
tkornuta-ibm Apr 17, 2019
8d93eac
c2 multimodal model
tkornuta-ibm Apr 17, 2019
a7a9097
rename c1_c3_binary_vf_cat_rnn_shared_all_encoders_three_ffns_losses
tkornuta-ibm Apr 17, 2019
4bc779c
microupdate of wikitext lm rnn config
tkornuta-ibm Apr 18, 2019
f2f10fb
c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml
tkornuta-ibm Apr 18, 2019
6cf7f21
added viewing of streams related to C2
tkornuta-ibm Apr 18, 2019
aa829fb
one ffn
tkornuta-ibm Apr 18, 2019
48c4964
rename c1_c2_c3_binary_cat_rnn_shared_all_encoders_one_ffn_loss
tkornuta-ibm Apr 18, 2019
a452774
Update README.md
tkornuta-ibm Apr 19, 2019
1d4b93a
first version of component calculating BLEU score
tkornuta-ibm Apr 19, 2019
9b3f977
first version of component calculating BLEU score
tkornuta-ibm Apr 19, 2019
806e008
bleu with weights, fixed bug with max along item axis
tkornuta-ibm Apr 19, 2019
4d133f4
Fixed mutltiple inheritance issue with mixin WordEmbeddings class
tkornuta-ibm Apr 19, 2019
8585fac
Deindexing mode added to sentence indexer
tkornuta-ibm Apr 19, 2019
3142b7c
Added broadcast functionality to FFN
aasseman Apr 23, 2019
0e4f39f
Add simple, all-in-one seq2seq RNN component
aasseman Apr 23, 2019
2794e2b
Modified RecurrentNeuralNetwork, such that it can input/output hidden…
aasseman Apr 23, 2019
10124db
Merge branch 'develop' into feat/extend-rnn
tkornuta-ibm Apr 23, 2019
1beb844
Update seq2seq_rnn.py
tkornuta-ibm Apr 23, 2019
df075d4
Update __init__.py
tkornuta-ibm Apr 23, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions configs/default/components/models/recurrent_neural_network.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ hidden_size: 100

# Flag informing the model to learn the intial state (h0/c0) (LOADED)
# When false, (c0/c0) will be initialized as zeros.
initial_state_trainable: True

# Initial state type:
# * Zero (null vector)
# * Trainable (xavier initialization, trainable)
# * Input (the initial hidden state comes from an input stream)
initial_state: Trainable

# Wether to include the last hidden state in the outputs
output_last_state: False

# Type of recurrent cell (LOADED)
# Options: LSTM | GRU | RNN_TANH | RNN_RELU
Expand All @@ -25,9 +33,19 @@ dropout_rate: 0
# Prediction mode (LOADED)
# Options:
# * Dense (passes every activation through output layer) |
# * Last (passes only the last activation though output layer)
# * Last (passes only the last activation though output layer) |
# * None (all outputs are discarded)
prediction_mode: Dense

# Input mode
# Options:
# * Dense (every iteration expects an input)
# * Autoregression_First (Autoregression, expects an input for the first iteration)
# * Autoregression_None (Autoregression, first input will be a null vector)
input_mode: Dense

autoregression_length: 42

# If true, output of the last layer will be additionally processed with Log Softmax (LOADED)
use_logsoftmax: True

Expand All @@ -39,9 +57,17 @@ streams:
# Stream containing batch of images (INPUT)
inputs: inputs

# Stream containing the inital state of the RNN (INPUT)
# The stream will be actually created only if `inital_state: Input`
input_state: input_state

# Stream containing predictions (OUTPUT)
predictions: predictions

# Stream containing the final output state of the RNN (output)
# The stream will be actually created only if `output_last_state: True`
output_state: output_state

globals:
####################################################################
# 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
Expand Down
81 changes: 81 additions & 0 deletions configs/default/components/models/seq2seq_rnn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# This file defines the default values for the RNN model.

####################################################################
# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
####################################################################

# Size of the hidden state (LOADED)
hidden_size: 100

# Flag informing the model to learn the intial state (h0/c0) (LOADED)
# When false, (c0/c0) will be initialized as zeros.

# Initial state type:
# * Zero (null vector)
# * Trainable (xavier initialization, trainable)
# * Input (the initial hidden state comes from an input stream)
initial_state: Trainable

# Wether to include the last hidden state in the outputs
output_last_state: False

# Type of recurrent cell (LOADED)
# Options: LSTM | GRU | RNN_TANH | RNN_RELU
cell_type: LSTM

# Number of "stacked" layers (LOADED)
num_layers: 1

# Dropout rate (LOADED)
# Default: 0 (means that it is turned off)
dropout_rate: 0

# Prediction mode (LOADED)
# Options:
# * Dense (passes every activation through output layer) |
# * Last (passes only the last activation though output layer) |
# * None (all outputs are discarded)
prediction_mode: Dense

# Input mode
# Options:
# * Dense (every iteration expects an input)
# * Autoregression_First (Autoregression, expects an input for the first iteration)
# * Autoregression_None (Autoregression, first input will be a null vector)
input_mode: Dense

autoregression_length: 50

# If true, output of the last layer will be additionally processed with Log Softmax (LOADED)
use_logsoftmax: True

streams:
####################################################################
# 2. Keymappings associated with INPUT and OUTPUT streams.
####################################################################

# Stream containing batch of images (INPUT)
inputs: inputs

# Stream containing predictions (OUTPUT)
predictions: predictions

globals:
####################################################################
# 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
####################################################################

# Size of the input (RETRIEVED)
input_size: input_size

# Size of the prediction (RETRIEVED)
prediction_size: prediction_size

####################################################################
# 4. Keymappings associated with GLOBAL variables that will be SET.
####################################################################

####################################################################
# 5. Keymappings associated with statistics that will be ADDED.
####################################################################

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ training:
episode_limit: 10000
epoch_limit: -1


# Validation parameters:
validation:
problem:
Expand Down
196 changes: 196 additions & 0 deletions configs/wikitext/wikitext_language_modeling_seq2seq.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# This pipeline applies seq2seq on wikitext-2 to make word-level prediction.
# It's been made for test purposes only, as it is doing:
# [word 0 , ... , word 49] -> [word 1 , ... , word 50] (basically copying most of the input)
#
# The seq2seq here is implemented throught the use of 2 `RecurrentNeuralNetwork`

# Training parameters:
training:
problem:
type: &p_type WikiTextLanguageModeling
data_folder: &data_folder ~/data/language_modeling/wikitext-2
dataset: &dataset wikitext-2
subset: train
sentence_length: 50
batch_size: 64

# optimizer parameters:
optimizer:
name: Adam
lr: 1.0e-3

# settings parameters
terminal_conditions:
loss_stop: 1.0e-2
episode_limit: 1000000
epoch_limit: 100

# Validation parameters:
validation:
partial_validation_interval: 100
problem:
type: *p_type
data_folder: *data_folder
dataset: *dataset
subset: valid
sentence_length: 50
batch_size: 64

# Testing parameters:
testing:
problem:
type: *p_type
data_folder: *data_folder
dataset: *dataset
subset: test
sentence_length: 50
batch_size: 64

pipeline:
name: wikitext_language_modeling_seq2seq

# Source encoding - model 1.
source_sentence_embedding:
type: SentenceEmbeddings
priority: 1.1
embeddings_size: 50
pretrained_embeddings: glove.6B.50d.txt
data_folder: *data_folder
source_vocabulary_files: wiki.train.tokens,wiki.valid.tokens,wiki.test.tokens
vocabulary_mappings_file: wiki.all.tokenized_words
additional_tokens: <eos>
export_word_mappings_to_globals: True
streams:
inputs: sources
outputs: embedded_sources

# Target encoding.
target_indexer:
type: SentenceIndexer
priority: 2.1
data_folder: *data_folder
import_word_mappings_from_globals: True
streams:
inputs: targets
outputs: indexed_targets

# Publish the hidden size of the seq2seq
global_publisher:
type: GlobalVariablePublisher
priority: 1
# Add input_size to globals, so classifier will use it.
keys: s2s_hidden_size
values: 300

# FF, to resize the embeddings to whatever the hidden size of te seq2seq is.
ff_resize_s2s_input:
type: FeedForwardNetwork
priority: 2.5
s2s_hidden_size: 300
use_logsoftmax: False
dimensions: 3
streams:
inputs: embedded_sources
predictions: embedded_sources_resized
globals:
input_size: embeddings_size
prediction_size: s2s_hidden_size

# LSTM Encoder
lstm_encoder:
type: RecurrentNeuralNetwork
priority: 3
initial_state: Trainable
hidden_size: 300
num_layers: 3
use_logsoftmax: False
output_last_state: True
prediction_mode: Last
streams:
inputs: embedded_sources_resized
predictions: s2s_encoder_output
output_state: s2s_state_output
globals:
input_size: s2s_hidden_size
prediction_size: s2s_hidden_size

# LSTM Decoder
lstm_decoder:
type: RecurrentNeuralNetwork
priority: 4
initial_state: Input
hidden_size: 300
num_layers: 3
use_logsoftmax: False
input_mode: Autoregression_First
autoregression_length: 50
prediction_mode: Dense
streams:
inputs: s2s_encoder_output
predictions: s2s_decoder_output
input_state: s2s_state_output
globals:
input_size: s2s_hidden_size
prediction_size: s2s_hidden_size

# FF, to resize the from the hidden size of the seq2seq to the size of the target vector
ff_resize_s2s_output:
type: FeedForwardNetwork
use_logsoftmax: True
dimensions: 3
priority: 5
streams:
inputs: s2s_decoder_output
globals:
input_size: s2s_hidden_size
prediction_size: vocabulary_size

# Loss
nllloss:
type: NLLLoss
priority: 6
num_targets_dims: 2
streams:
targets: indexed_targets
loss: loss

# Prediction decoding.
prediction_decoder:
type: SentenceIndexer
priority: 10
# Reverse mode.
reverse: True
# Use distributions as inputs.
use_input_distributions: True
data_folder: *data_folder
import_word_mappings_from_globals: True
streams:
inputs: predictions
outputs: prediction_sentences


# Statistics.
batch_size:
type: BatchSizeStatistics
priority: 100.0

#accuracy:
# type: AccuracyStatistics
# priority: 100.1
# streams:
# targets: indexed_targets

bleu:
type: BLEUStatistics
priority: 100.2
streams:
targets: indexed_targets


# Viewers.
viewer:
type: StreamViewer
priority: 100.3
input_streams: sources,targets,indexed_targets,prediction_sentences

#: pipeline
Loading