Skip to content
This repository was archived by the owner on Jul 18, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions configs/default/components/publishers/bleu_statistics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# This file defines the default values for the Accuracy statistics.

####################################################################
# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
####################################################################

# Flag indicating whether prediction are represented as distributions or indices (LOADED)
# Options: True (expects distribution for each preditions)
# False (expects indices (max args))
use_prediction_distributions: True

# When set to True, performs masking of selected samples from batch (LOADED)
# TODO!
#use_masking: False

# Weights of n-grams used when calculating the score.
weights: [0.25, 0.25, 0.25, 0.25]

streams:
####################################################################
# 2. Keymappings associated with INPUT and OUTPUT streams.
####################################################################

# Stream containing targets (label ids) (INPUT)
targets: targets

# Stream containing batch of predictions (INPUT)
predictions: predictions

# Stream containing masks used for masking of selected samples from batch (INPUT)
#masks: masks

globals:
####################################################################
# 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
####################################################################

# Word mappings used for mappings of predictions/targets into list of words (RERIEVED)
word_mappings: word_mappings

####################################################################
# 4. Keymappings associated with GLOBAL variables that will be SET.
####################################################################

statistics:
####################################################################
# 5. Keymappings associated with statistics that will be ADDED.
####################################################################

# Name used for collected statistics (ADDED).
bleu: bleu


8 changes: 8 additions & 0 deletions configs/default/components/text/sentence_indexer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ import_word_mappings_from_globals: False
# Flag informing whether word mappings will be exported to globals (LOADED)
export_word_mappings_to_globals: False

# Operation mode. If 'reverse' is True, then it will change indices into words (LOADED)
reverse: False

# Flag indicating whether inputs are represented as distributions or indices (LOADED)
# Options: True (expects distribution for each input item in sequence)
# False (expects indices (max args))
use_input_distributions: False

streams:
####################################################################
# 2. Keymappings associated with INPUT and OUTPUT streams.
Expand Down
46 changes: 39 additions & 7 deletions configs/wikitext/wikitext_language_modeling_rnn.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ training:
data_folder: &data_folder ~/data/language_modeling/wikitext-2
dataset: &dataset wikitext-2
subset: train
sentence_length: 50
sentence_length: 10
batch_size: 64

# optimizer parameters:
Expand All @@ -27,7 +27,7 @@ validation:
data_folder: *data_folder
dataset: *dataset
subset: valid
sentence_length: 50
sentence_length: 20
batch_size: 64

# Testing parameters:
Expand All @@ -46,7 +46,7 @@ pipeline:
# Source encoding - model 1.
source_sentence_embedding:
type: SentenceEmbeddings
priority: 1.1
priority: 1
embeddings_size: 50
pretrained_embeddings: glove.6B.50d.txt
data_folder: *data_folder
Expand All @@ -61,7 +61,7 @@ pipeline:
# Target encoding.
target_indexer:
type: SentenceIndexer
priority: 2.1
priority: 2
data_folder: *data_folder
import_word_mappings_from_globals: True
streams:
Expand All @@ -86,12 +86,44 @@ pipeline:
num_targets_dims: 2
streams:
targets: indexed_targets
loss: loss

# Prediction decoding.
prediction_decoder:
type: SentenceIndexer
priority: 10
# Reverse mode.
reverse: True
# Use distributions as inputs.
use_input_distributions: True
data_folder: *data_folder
import_word_mappings_from_globals: True
streams:
inputs: predictions
outputs: prediction_sentences


# Statistics.
batch_size:
type: BatchSizeStatistics
priority: 100.0

#accuracy:
# type: AccuracyStatistics
# priority: 100.1
# streams:
# targets: indexed_targets

bleu:
type: BLEUStatistics
priority: 100.2
streams:
targets: indexed_targets


# Viewers.
viewer:
type: StreamViewer
priority: 100.1
input_streams: sources,indexed_targets,targets,predictions
priority: 100.3
input_streams: sources,targets,indexed_targets,prediction_sentences

#: pipeline
25 changes: 10 additions & 15 deletions ptp/components/mixins/word_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,26 @@
import os

import ptp.components.utils.word_mappings as wm
from ptp.components.component import Component


class WordMappings(Component):
class WordMappings(object):
"""
Mixin class that handles the initialization of (word:index) mappings.
Assumes that it is mixed-in into class that is derived from the component.
.. warning::
Constructor (__init__) of the Component class has to be called before component of the mixin WordMapping class.

"""
def __init__(self, name, class_type, config):
def __init__(self): #, name, class_type, config):
"""
Initializes the (word:index) mappings.

Loads parameters from configuration,

:param name: Component name (read from configuration file).
:type name: str

:param class_type: Class type of the component (derrived from this class).

:param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
:type config: :py:class:`ptp.configuration.ConfigInterface`
Assumes that Component was initialized in advance, which means that the self object possesses the following objects:
- self.config
- self.globals
- self.logger

"""
# Call constructors of parent classes.
Component.__init__(self, name, class_type, config)

# Read the actual configuration.
self.data_folder = os.path.expanduser(self.config['data_folder'])

Expand Down
2 changes: 1 addition & 1 deletion ptp/components/models/sentence_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, name, config):
"""
# Call base class constructors.
Model.__init__(self, name, SentenceEmbeddings, config)
WordMappings.__init__(self, name, SentenceEmbeddings, config)
WordMappings.__init__(self)

# Set key mappings.
self.key_inputs = self.stream_keys["inputs"]
Expand Down
2 changes: 2 additions & 0 deletions ptp/components/publishers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from .accuracy_statistics import AccuracyStatistics
from .batch_size_statistics import BatchSizeStatistics
from .bleu_statistics import BLEUStatistics
from .global_variable_publisher import GlobalVariablePublisher
from .precision_recall_statistics import PrecisionRecallStatistics

__all__ = [
'AccuracyStatistics',
'BatchSizeStatistics',
'BLEUStatistics',
'GlobalVariablePublisher',
'PrecisionRecallStatistics',
]
Loading