The methods here require data that can be found in the project's cloud repository. Contact MCB for access to that.

In [6]:
import json

with open('data/REPS.json', 'r') as f:
    REPS = json.load(f)

In [10]:
def get(data, word, representation = 'phon_output'):

    """
    Get the representation from the data for a word.

    The data are those that are used in training and testing the
    temporal model for time varying reader. Words here are those
    letter strings for which we have orthographic inputs and
    phonological inputs/ outputs. Representations here are
    either orthographic inputs, phonological inputs (SOS patterns),
    phonological outputs (EOS patterns), or frequencies.

    Parameters
    ----------
    data : dict
        A data containing the appropriately structured words and
        their representations.
    
    word : str
        A string of letters indexing a word present in data.

    representation : str
        The specification for the type of representation/ data
        one wished to extract from data[word]. There are four
        possibilities. "phon_output" (the default) is a list
        comprising the model's phonological output representation.
        "phon_input" is a list comprising the model's phonological
        (i.e., decoder) input representation. "orth" is a list
        comprising the model's orthographic (i.e., encoder) input.
        "frequency" is an integer that is the frequency (k-smoothed)
        used during training.
    
    Output
    ------
    list or int
        The output format is contingent upon the specification for
        the representation parameter.
    
    
    """
    
    if representation == 'phon_output':
        representation = 'phonEOS'
    if representation == 'phon_input':
        representation = 'phonSOS'

    return data[word][representation]


# Example
Here I access the frequency of the word "the".

In [13]:
get(REPS, 'the', representation='frequency')

23099035

Here I get the phonological output for that same word.

In [14]:
get(REPS, 'the', representation='phon_output')

[[0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1]]