Skip to content
This repository was archived by the owner on Jul 18, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
cbdf56e
Merge branch 'feat/extend-rnn' into feat/attn-decoder-rnn
aasseman Apr 24, 2019
4d698e1
Merge branch 'feat/extend-rnn' into feat/attn-decoder-rnn
aasseman Apr 24, 2019
09005c7
Fixed DataDefinition of RecurrentNeuralNetwork's output and input sta…
aasseman Apr 24, 2019
0ba7a73
Merge branch 'feat/extend-rnn' into feat/attn-decoder-rnn
aasseman Apr 24, 2019
3ab87f7
Merge branch 'feat/extend-rnn' into feat/attn-decoder-rnn
aasseman Apr 24, 2019
964a407
Merge branch 'feat/extend-rnn' into feat/attn-decoder-rnn
aasseman Apr 25, 2019
0ec24fa
Added first prototype of Attn_Decoder, with dummy wikitext test
aasseman Apr 25, 2019
0a53520
Merge branch 'refact/better-download' into feat/attn-decoder-rnn
aasseman Apr 26, 2019
df60fb4
Added translation problem
aasseman Apr 26, 2019
04a42d6
Add fixed padding option to sentence_embeddings, sentence_indexer
aasseman Apr 26, 2019
82a2121
Changed translation config for fixed padding compatibility
aasseman Apr 26, 2019
0138ef2
Merge branch 'develop' of github.com:IBM/pytorchpipe into feat/attn-d…
aasseman Apr 26, 2019
dd1477d
Merge branch 'feat/fixed-sentence-padding' into feat/attn-decoder-rnn
aasseman Apr 26, 2019
1fad1fc
Merge branch 'develop' of github.com:IBM/pytorchpipe into feat/attn-d…
aasseman Apr 26, 2019
afaf7df
Cleaning
aasseman Apr 26, 2019
93a1167
Merge branch 'vqa_med_yn_fix' of github.com:IBM/pytorchpipe into feat…
aasseman Apr 26, 2019
bc58318
Merge branch 'develop' of github.com:IBM/pytorchpipe into feat/attn-d…
aasseman Apr 27, 2019
b8ed220
Cleaning
aasseman Apr 27, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions configs/default/components/models/attn_decoder_rnn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# This file defines the default values for the RNN model.

####################################################################
# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
####################################################################

# Size of the hidden state (LOADED)
hidden_size: 100

# Wether to include the last hidden state in the outputs
output_last_state: False

# Type of recurrent cell (LOADED)
# -> Only GRU is supported

# Number of "stacked" layers (LOADED)
# -> Only a single layer is supported

# Dropout rate (LOADED)
# Default: 0 (means that it is turned off)
dropout_rate: 0

# Prediction mode (LOADED)
# Options:
# * Dense (passes every activation through output layer) |
# * Last (passes only the last activation though output layer) |
# * None (all outputs are discarded)
prediction_mode: Dense

# Enable FFN layer at the output of the RNN (before eventual feed back in the case of autoregression).
# Useful if the raw outputs of the RNN are needed, for attention encoder-decoder for example.
ffn_output: True

# Length of generated output sequence (LOADED)
# User must set it per task, as it is task specific.
autoregression_length: 10

# If true, output of the last layer will be additionally processed with Log Softmax (LOADED)
use_logsoftmax: True

streams:
####################################################################
# 2. Keymappings associated with INPUT and OUTPUT streams.
####################################################################

# Stream containing batch of encoder outputs (INPUT)
inputs: inputs

# Stream containing the inital state of the RNN (INPUT)
# The stream will be actually created only if `inital_state: Input`
input_state: input_state

# Stream containing predictions (OUTPUT)
predictions: predictions

# Stream containing the final output state of the RNN (output)
# The stream will be actually created only if `output_last_state: True`
output_state: output_state

globals:
####################################################################
# 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
####################################################################

# Size of the input (RETRIEVED)
input_size: input_size

# Size of the prediction (RETRIEVED)
prediction_size: prediction_size

####################################################################
# 4. Keymappings associated with GLOBAL variables that will be SET.
####################################################################

####################################################################
# 5. Keymappings associated with statistics that will be ADDED.
####################################################################

Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# This file defines the default values for the WikiText language modeling.

####################################################################
# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
####################################################################

# Folder where problem will store data (LOADED)
data_folder: ~/data/language_modeling/translation_pairs

# Defines the dataset that will be used used (LOADED)
# Options: eng-fra, eng-pol
dataset: eng-fra

# Defines the used subset (LOADED)
# Options: train | valid | test
subset: train

# Length limit of source and target sentence
# if < 0, no limit
sentence_length: 10

streams:
####################################################################
# 2. Keymappings associated with INPUT and OUTPUT streams.
####################################################################

# Stream containing batch of indices (OUTPUT)
# Every problem MUST return that stream.
indices: indices

# Stream containing batch of tokenized source sentences (OUTPUT)
sources: sources

# Stream containing batch of tokenized target sentences (OUTPUT)
targets: targets

globals:
####################################################################
# 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
####################################################################

####################################################################
# 4. Keymappings associated with GLOBAL variables that will be SET.
####################################################################

####################################################################
# 5. Keymappings associated with statistics that will be ADDED.
####################################################################

172 changes: 172 additions & 0 deletions configs/translation/eng_fra_translation_enc_attndec.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# This pipeline applied an encoder-decoder GRU with attention on the open Tatoeba translation sentence pairs.
# Inspired by https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html .
# Note that training will be slower than in the tutorial, as teacher forcing is not implemented here.

# Training parameters:
training:
problem:
type: &p_type TranslationPairs
data_folder: &data_folder ~/data/language_modeling/translation_pairs
dataset: &dataset eng-fra
subset: train
sentence_length: 10
batch_size: 64

# optimizer parameters:
optimizer:
name: Adam
lr: 1.0e-3

# settings parameters
terminal_conditions:
loss_stop: 1.0e-2
episode_limit: 1000000
epoch_limit: 100

# Validation parameters:
validation:
partial_validation_interval: 100
problem:
type: *p_type
data_folder: *data_folder
dataset: *dataset
subset: valid
sentence_length: 10
batch_size: 64

# Testing parameters:
testing:
problem:
type: *p_type
data_folder: *data_folder
dataset: *dataset
subset: test
sentence_length: 10
batch_size: 64

pipeline:
name: eng_fra_translation_enc_attndec

# Source encoding - model 1.
source_sentence_embedding:
type: SentenceEmbeddings
priority: 1.1
embeddings_size: 50
pretrained_embeddings: glove.6B.50d.txt
data_folder: *data_folder
source_vocabulary_files: eng-fra/eng.train.txt,eng-fra/eng.valid.txt,eng-fra/eng.test.txt
vocabulary_mappings_file: eng-fra/eng.all.tokenized_words
regenerate: True
additional_tokens: <eos>
import_word_mappings_from_globals: False
export_word_mappings_to_globals: False
fixed_padding: 10
streams:
inputs: sources
outputs: embedded_sources

# Target encoding.
target_indexer:
type: SentenceIndexer
priority: 2.1
data_folder: *data_folder
source_vocabulary_files: eng-fra/fra.train.txt,eng-fra/fra.valid.txt,eng-fra/fra.test.txt
import_word_mappings_from_globals: False
export_word_mappings_to_globals: True
fixed_padding: 10
regenerate: True
streams:
inputs: targets
outputs: indexed_targets

# Single layer GRU Encoder
encoder:
type: RecurrentNeuralNetwork
cell_type: GRU
priority: 3
initial_state: Trainable
hidden_size: 50
num_layers: 1
use_logsoftmax: False
output_last_state: True
prediction_mode: Dense
ffn_output: False
streams:
inputs: embedded_sources
predictions: s2s_encoder_output
output_state: s2s_state_output
globals:
input_size: embeddings_size
prediction_size: embeddings_size

# Single layer GRU Decoder with attention
decoder:
type: Attn_Decoder_RNN
priority: 4
hidden_size: 50
use_logsoftmax: False
autoregression_length: 10
prediction_mode: Dense
streams:
inputs: s2s_encoder_output
predictions: s2s_decoder_output
input_state: s2s_state_output
globals:
input_size: embeddings_size
prediction_size: embeddings_size

# FF, to resize the from the output size of the seq2seq to the size of the target vector
ff_resize_s2s_output:
type: FeedForwardNetwork
use_logsoftmax: True
dimensions: 3
priority: 5
streams:
inputs: s2s_decoder_output
globals:
input_size: embeddings_size
prediction_size: vocabulary_size

# Loss
nllloss:
type: NLLLoss
priority: 6
num_targets_dims: 2
streams:
targets: indexed_targets
loss: loss

# Prediction decoding.
prediction_decoder:
type: SentenceIndexer
priority: 10
# Reverse mode.
reverse: True
# Use distributions as inputs.
use_input_distributions: True
data_folder: *data_folder
import_word_mappings_from_globals: True
streams:
inputs: predictions
outputs: prediction_sentences


# Statistics.
batch_size:
type: BatchSizeStatistics
priority: 100.0

bleu:
type: BLEUStatistics
priority: 100.2
streams:
targets: indexed_targets


# Viewers.
viewer:
type: StreamViewer
priority: 100.3
input_streams: sources,targets,indexed_targets,prediction_sentences

#: pipeline
Loading