From ea8aea1beb9c7ef8277293166c3b07ad2fdb7a14 Mon Sep 17 00:00:00 2001
From: Michael McAuliffe <michael.e.mcauliffe@gmail.com>
Date: Sun, 6 Aug 2017 07:33:08 -0400
Subject: [PATCH] DOCS: Reogranization WIP

---
 bin/pgdb.py                                   |   2 +-
 docs/source/acoustics.rst                     |  16 +
 docs/source/acoustics_backend.rst             |   5 +
 docs/source/acoustics_encoding.rst            |   6 +
 docs/source/acoustics_querying.rst            |   6 +
 docs/source/api_graph.rst                     |  10 +-
 docs/source/enrichment.rst                    |  19 +
 docs/source/enrichment_csvs.rst               |  76 ++++
 docs/source/enrichment_queries.rst            |  65 +++
 docs/source/enrichment_syllables.rst          | 108 +++++
 docs/source/enrichment_utterances.rst         |  94 +++++
 .../{installation.rst => getting_started.rst} |   0
 docs/source/{importing.rst => import.rst}     |   0
 docs/source/index.rst                         |  10 +-
 .../source/{graph_queries.rst => queries.rst} |  13 +-
 docs/source/queries_aggregates.rst            |  65 ---
 docs/source/queries_annotations.rst           | 370 ++++++++++++++++++
 docs/source/queries_basic.rst                 | 138 -------
 docs/source/queries_discourse.rst             |   7 +
 docs/source/queries_lexicon.rst               |  18 +
 docs/source/queries_ordering.rst              |  29 --
 docs/source/queries_reference.rst             | 122 ++++++
 docs/source/queries_speakers.rst              |   8 +
 docs/source/queries_subannotations.rst        |  53 ---
 docs/source/queries_subpaths.rst              |  81 ----
 docs/source/queries_subsets.rst               |  52 ---
 polyglotdb/corpus/featured.py                 |  13 +
 polyglotdb/corpus/lexical.py                  |  14 +
 polyglotdb/corpus/spoken.py                   |  23 ++
 polyglotdb/corpus/syllabic.py                 |  35 +-
 tests/test_client.py                          |   5 +-
 tests/test_enrich.py                          |   2 +-
 tests/test_summarized.py                      |   6 +-
 33 files changed, 1016 insertions(+), 455 deletions(-)
 create mode 100644 docs/source/acoustics.rst
 create mode 100644 docs/source/acoustics_backend.rst
 create mode 100644 docs/source/acoustics_encoding.rst
 create mode 100644 docs/source/acoustics_querying.rst
 create mode 100644 docs/source/enrichment.rst
 create mode 100644 docs/source/enrichment_csvs.rst
 create mode 100644 docs/source/enrichment_queries.rst
 create mode 100644 docs/source/enrichment_syllables.rst
 create mode 100644 docs/source/enrichment_utterances.rst
 rename docs/source/{installation.rst => getting_started.rst} (100%)
 rename docs/source/{importing.rst => import.rst} (100%)
 rename docs/source/{graph_queries.rst => queries.rst} (60%)
 delete mode 100644 docs/source/queries_aggregates.rst
 create mode 100644 docs/source/queries_annotations.rst
 delete mode 100644 docs/source/queries_basic.rst
 create mode 100644 docs/source/queries_discourse.rst
 create mode 100644 docs/source/queries_lexicon.rst
 delete mode 100644 docs/source/queries_ordering.rst
 create mode 100644 docs/source/queries_reference.rst
 create mode 100644 docs/source/queries_speakers.rst
 delete mode 100644 docs/source/queries_subannotations.rst
 delete mode 100644 docs/source/queries_subpaths.rst
 delete mode 100644 docs/source/queries_subsets.rst

diff --git a/bin/pgdb.py b/bin/pgdb.py
index ea0342da..25857093 100644
--- a/bin/pgdb.py
+++ b/bin/pgdb.py
@@ -50,7 +50,7 @@ def save_config(c):
 
 TEMP_DIR = os.path.join(CONFIG_DIR, 'downloads')
 
-NEO4J_VERSION = '3.0.7'
+NEO4J_VERSION = '3.2.3'
 
 INFLUXDB_VERSION = '1.1.0'
 
diff --git a/docs/source/acoustics.rst b/docs/source/acoustics.rst
new file mode 100644
index 00000000..3afb3a6e
--- /dev/null
+++ b/docs/source/acoustics.rst
@@ -0,0 +1,16 @@
+.. _acoustics:
+
+*****************
+Acoustic measures
+*****************
+
+TODO blurb
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   acoustics_encoding.rst
+   acoustics_querying.rst
+   acoustics_backend.rst
diff --git a/docs/source/acoustics_backend.rst b/docs/source/acoustics_backend.rst
new file mode 100644
index 00000000..c18d89f8
--- /dev/null
+++ b/docs/source/acoustics_backend.rst
@@ -0,0 +1,5 @@
+.. _acoustics_backend:
+
+****************
+Acoustic backend
+****************
diff --git a/docs/source/acoustics_encoding.rst b/docs/source/acoustics_encoding.rst
new file mode 100644
index 00000000..4c4c68ec
--- /dev/null
+++ b/docs/source/acoustics_encoding.rst
@@ -0,0 +1,6 @@
+.. _acoustics_encoding:
+
+**************************
+Encoding acoustic measures
+**************************
+
diff --git a/docs/source/acoustics_querying.rst b/docs/source/acoustics_querying.rst
new file mode 100644
index 00000000..af8abcfc
--- /dev/null
+++ b/docs/source/acoustics_querying.rst
@@ -0,0 +1,6 @@
+.. _acoustics_querying:
+
+**************************
+Querying acoustic measures
+**************************
+
diff --git a/docs/source/api_graph.rst b/docs/source/api_graph.rst
index f103cd01..78b8521f 100644
--- a/docs/source/api_graph.rst
+++ b/docs/source/api_graph.rst
@@ -8,7 +8,7 @@ Graph API
 
 Queries
 -------
-.. currentmodule:: polyglotdb.graph.query
+.. currentmodule:: polyglotdb.query.annotations.query
 
 .. autosummary::
    :toctree: generated/
@@ -20,20 +20,20 @@ Queries
 
 Attributes
 ----------
-.. currentmodule:: polyglotdb.graph.attributes
+.. currentmodule:: polyglotdb.query.annotations.attributes.base
 
 .. autosummary::
    :toctree: generated/
    :template: class.rst
 
-   Attribute
+   AnnotationNode
    AnnotationAttribute
 
 .. _graph_clauses_api:
 
 Clause elements
 ---------------
-.. currentmodule:: polyglotdb.graph.elements
+.. currentmodule:: polyglotdb.query.annotations.elements
 
 .. autosummary::
    :toctree: generated/
@@ -54,7 +54,7 @@ Clause elements
 
 Aggregate functions
 -------------------
-.. currentmodule:: polyglotdb.graph.func
+.. currentmodule:: polyglotdb.query.base.func
 
 .. autosummary::
    :toctree: generated/
diff --git a/docs/source/enrichment.rst b/docs/source/enrichment.rst
new file mode 100644
index 00000000..cc6928b8
--- /dev/null
+++ b/docs/source/enrichment.rst
@@ -0,0 +1,19 @@
+.. _enrichment:
+
+**********
+Enrichment
+**********
+
+Following import, the corpus is often fairly bare, with just word and phone annotations.  An important step in analyzing
+corpora is therefore enriching it with other information.  Most of the methods here are automatic once a function is called.
+
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   enrichment_syllables.rst
+   enrichment_utterances.rst
+   enrichment_csvs.rst
+   enrichment_queries.rst
diff --git a/docs/source/enrichment_csvs.rst b/docs/source/enrichment_csvs.rst
new file mode 100644
index 00000000..95f590ea
--- /dev/null
+++ b/docs/source/enrichment_csvs.rst
@@ -0,0 +1,76 @@
+.. _enrichment_csvs:
+
+************************
+Enrichment via CSV files
+************************
+
+PolyglotDB supports ways of adding arbitrary information to annotations or metadata about speakers and files by specifying
+a local CSV file to add information from.  When constructing this CSV file, the first column should be the label used to
+identify which element should be enriched, and all subsequent columns are used as properties to add to the corpus.
+
+::
+
+   ID_column,property_one,property_two
+   first_item,first_item_value_one,first_item_value_two
+   second_item,,second_item_value_two
+
+Enriching using this file would look up elements based on the `ID_column`, and the one matching `first_item` would get
+both `property_one` and `property_two` (with the respective values).  The one matching `second_item` would only get a
+`property_two` (because the value for `property_one` is empty.
+
+.. _enrich_lexicon:
+
+Enriching the lexicon
+=====================
+
+.. code-block:: python
+
+   lexicon_csv_path = '/full/path/to/lexicon/data.csv'
+   with CorpusContext(config) as c:
+       c.enrich_lexicon_from_csv(lexicon_csv_path)
+
+
+.. note::
+
+   The function `enrich_lexicon_from_csv` accepts an optional keyword `case_sensitive` and defaults to `False`.  Changing this
+   will respect capitalization when looking up words.
+
+
+.. _enrich_inventory:
+
+Enriching the phonological inventory
+====================================
+
+The phone inventory can be enriched with arbitrary properties via:
+
+.. code-block:: python
+
+   inventory_csv_path = '/full/path/to/inventory/data.csv'
+   with CorpusContext(config) as c:
+       c.enrich_inventory_from_csv(inventory_csv_path)
+
+.. _enrich_speakers:
+
+Enriching speaker information
+=============================
+
+Speaker information can be added via:
+
+.. code-block:: python
+
+   speaker_csv_path = '/full/path/to/speaker/data.csv'
+   with CorpusContext(config) as c:
+       c.enrich_speakers_from_csv(speaker_csv_path)
+
+.. _enrich_discourses:
+
+Enriching discourse information
+===============================
+
+Metadata about the discourses or sound files can be added via:
+
+.. code-block:: python
+
+   discourse_csv_path = '/full/path/to/discourse/data.csv'
+   with CorpusContext(config) as c:
+       c.enrich_discourses_from_csv(discourse_csv_path)
diff --git a/docs/source/enrichment_queries.rst b/docs/source/enrichment_queries.rst
new file mode 100644
index 00000000..496d8a2a
--- /dev/null
+++ b/docs/source/enrichment_queries.rst
@@ -0,0 +1,65 @@
+.. _enrichment_queries:
+
+**********************
+Enrichment via queries
+**********************
+
+Queries have the functionality to set properties and create subsets of elements based on results.
+
+For instance, if you wanted to make word initial phones more easily queryable, you could perform the following:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone)
+       q = q.filter(c.phone.begin == c.phone.word.begin)
+       q.create_subset('word-initial')
+
+Once that code completes, a subsequent query could be made of:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone)
+       q = q.filter(c.phone.subset == 'word-initial)
+       print(q.all()))
+
+Or instead of a subset, a property could be encoded as:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone)
+       q = q.filter(c.phone.begin == c.phone.word.begin)
+       q.set_properties(position='word-initial')
+
+And then this property can be exported as a column in a csv:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone)
+       q.columns(c.position)
+       q.to_csv(some_csv_path)
+
+
+Lexicon queries can also be used in the same way to create subsets and encode properties that do not vary on a token by token basis.
+
+For instance, a subset for high vowels can be created as follows:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       high_vowels = ['iy', 'ih','uw','uh']
+       q = c.query_lexicon(c.lexicon_phone)
+       q = q.filter(c.lexicon_phone.label.in_(high_vowels))
+       q.create_subset('high_vowel')
+
+Which can then be used to query phone annotations:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone)
+       q = q.filter(c.phone.subset == 'high_vowel')
+       print(q.all())
diff --git a/docs/source/enrichment_syllables.rst b/docs/source/enrichment_syllables.rst
new file mode 100644
index 00000000..069ab2d2
--- /dev/null
+++ b/docs/source/enrichment_syllables.rst
@@ -0,0 +1,108 @@
+.. _enrichment_syllables:
+
+***********************
+Creating syllable units
+***********************
+
+Syllables are groupings of phones into larger units, within words. PolyglotDB enforces a strict hierarchy, with the boundaries
+of words aligning with syllable boundaries (i.e., syllables cannot stretch across words).
+
+At the moment, only one algorithm is supported (`maximal onset`) because its simplicity lends it to be language agnostic.
+
+To encode syllables, there are two steps:
+
+1. :ref:`encoding_syllabics`
+2. :ref:`encoding_syllables`
+
+
+.. _encoding_syllabics:
+
+Encoding syllabic segments
+==========================
+
+Syllabic segments are called via a specialized function:
+
+
+
+.. code-block:: python
+
+   syllabic_segments = ['aa', 'ae','ih']
+   with CorpusContext(config) as c:
+        c.encode_syllabic_segments(syllabic_segments)
+
+
+Following this code, all phones with labels of `aa, ae, ih` will belong to the subset `syllabic`.  This subset can be
+then queried in the future, in addition to allowing syllables to be encoded.
+
+.. _encoding_syllables:
+
+Encoding syllables
+==================
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+        c.encode_syllables()
+
+.. note::
+
+   The function `encode_syllables` can be given a keyword argument for `call_back`, which is a function like `print` that
+   allows for progress to be output to the console.
+
+Following encoding, syllables are available to queried and used as any other linguistic unit. For example, to get a list of
+all the instances of syllables at the beginnings of words:
+
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+        q = c.query_graph(c.syllable).filter(c.syllable.begin == c.syllable.word.begin)
+        print(q.all())
+
+.. _stress_tone:
+
+Encoding syllable properties from syllabics
+===========================================
+
+Often in corpora there is information about syllables contained on the vowels.  For instance, if the transcription contains
+stress levels, they will be specified as numbers 0-2 on the vowels (i.e. as in Arpabet).  Tone is likewise similarly encoded
+in some transcription systems.  This section details functions that strip this information from the vowel and place it on
+the syllable unit instead.
+
+.. note::
+
+   Removing the stress/tone information from the vowel makes queries easier, as getting all `AA` tokens no longer requires
+   specifying that the label is in the set of `AA1, AA2, AA0`.  This functionality can be disabled by specifying `clean_phone_label=False`
+   in the two functions that follow.
+
+.. _stress_enrichment:
+
+Encoding stress
+---------------
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+
+        c.encode_stress_to_syllables()
+
+.. note::
+
+   By default, stress is taken to be numbers in the vowel label (i.e., `AA1` would have a stress of `1`).  A different
+   pattern to use for stress information can be specified through the optional `regex` keyword argument.
+
+
+.. _tone_enrichment:
+
+Encoding tone
+-------------
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+
+        c.encode_tone_to_syllables()
+
+.. note::
+
+   As for stress, a different regex can be specified with the `regex` keyword argument.
\ No newline at end of file
diff --git a/docs/source/enrichment_utterances.rst b/docs/source/enrichment_utterances.rst
new file mode 100644
index 00000000..05f8bba1
--- /dev/null
+++ b/docs/source/enrichment_utterances.rst
@@ -0,0 +1,94 @@
+.. _enrichment_utterances:
+
+************************
+Creating utterance units
+************************
+
+Utterances are groups of words that are continuous in some sense.  The can be thought of as similar to interpausal units or chunks
+in other work.  The basic idea is that there are intervals in which there are no speech, a subset of which count as breaks in speech
+depending on the length of these non-speech intervals.
+
+To encode utterances, there are two steps:
+
+1. :ref:`encoding_pauses`
+2. :ref:`encoding_utterances`
+
+
+.. _encoding_pauses:
+
+Encoding non-speech elements
+============================
+
+Non-speech elements in PolyglotDB are termed `pause`.  Pauses are encoded as follows:
+
+.. code-block:: python
+
+   nonspeech_words = ['<SIL>','<IVER>']
+   with CorpusContext(config) as c:
+        c.encode_pauses(nonspeech_words)
+
+The function `encode_pauses` takes a list of word labels that should not be considered speech in a discourse and marks them as such.
+
+.. note::
+
+   Non-speech words can also be encoded through regular expressions, as in:
+
+   .. code-block:: python
+
+      nonspeech_words = '^[<[{].*'
+      with CorpusContext(config) as c:
+          c.encode_pauses(nonspeech_words)
+
+   Where the pattern to be matched is any label that starts with `<` or `[`.
+
+Once pauses are encoded, aspects of pauses can be queried, as follows:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.pause).filter(c.pause.discourse.name == 'one_discourse')
+       print(q.all())
+
+Additionally, word annotations can have previous and following pauses that can be found:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.word).columns(c.word.label,
+                                          c.word.following_pause_duration.column_name('pause_duration'))
+       print(q.all())
+
+
+
+.. note::
+
+   Once pauses are encoded, accessing an annotation's previous or following word via `c.word.previous` will skip over
+   any pauses.  So for a string like `I <SIL> go...`, the previous word to the word `go` would be `I` rather than `<SIL>`.
+
+.. _encoding_utterances:
+
+Encoding utterances
+===================
+
+Once pauses are encoded, utterances can be encoded by specifying the minimum length of non-speech elements that count as
+a break between stretches of speech.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+        c.encode_utterances(min_pause_length=0.15)
+
+.. note::
+
+   The function `encode_utterances` can be given a keyword argument for `call_back`, which is a function like `print` that
+   allows for progress to be output to the console.
+
+Following encoding, utterances are available to queried and used as any other linguistic unit. For example, to get a list of
+all the instances of words at the beginnings of utterances:
+
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+        q = c.query_graph(c.word).filter(c.word.begin == c.word.utterance.begin)
+        print(q.all())
\ No newline at end of file
diff --git a/docs/source/installation.rst b/docs/source/getting_started.rst
similarity index 100%
rename from docs/source/installation.rst
rename to docs/source/getting_started.rst
diff --git a/docs/source/importing.rst b/docs/source/import.rst
similarity index 100%
rename from docs/source/importing.rst
rename to docs/source/import.rst
diff --git a/docs/source/index.rst b/docs/source/index.rst
index b284c1be..ae652d6e 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -8,10 +8,14 @@ Contents:
    :maxdepth: 2
 
    introduction.rst
-   installation.rst
-   importing.rst
-   graph_queries.rst
+   getting_started.rst
+   client.rst
+   import.rst
+   enrichment.rst
+   acoustics.rst
+   queries.rst
    io.rst
+   concepts.rst
    apireference.rst
 
 
diff --git a/docs/source/graph_queries.rst b/docs/source/queries.rst
similarity index 60%
rename from docs/source/graph_queries.rst
rename to docs/source/queries.rst
index adda427b..76aef7c9 100644
--- a/docs/source/graph_queries.rst
+++ b/docs/source/queries.rst
@@ -1,4 +1,4 @@
-.. _graph_queries:
+.. _queries:
 
 ****************
 Querying corpora
@@ -13,9 +13,8 @@ Contents:
 .. toctree::
    :maxdepth: 2
 
-   queries_basic.rst
-   queries_aggregates.rst
-   queries_ordering.rst
-   queries_subsets.rst
-   queries_subannotations.rst
-   queries_subpaths.rst
+   queries_annotations.rst
+   queries_lexicon.rst
+   queries_speakers.rst
+   queries_discourse.rst
+   queries_reference.rst
diff --git a/docs/source/queries_aggregates.rst b/docs/source/queries_aggregates.rst
deleted file mode 100644
index d737f638..00000000
--- a/docs/source/queries_aggregates.rst
+++ /dev/null
@@ -1,65 +0,0 @@
-.. _aggregates_and_groups:
-
-*********************
-Aggregates and groups
-*********************
-
-Aggregate functions are available in :code:`polyglotdb.graph.func`.  Aggregate
-functions available are:
-
-* Average
-* Count
-* Max
-* Min
-* Stdev
-* Sum
-
-In general, these functions take a numeric attribute as an argument.  The
-only one that does not follow this pattern is :code:`Count`.
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
-       q = q.filter(c.phone.following.label == 'r')
-
-       result = q.aggregate(Count())
-       print(result)
-
-
-Like the :code:`all` function, :code:`aggregate` triggers evaluation of the query.
-Instead of returning rows, it will return a single number, which is the
-number of rows matching this query.
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
-       q = q.filter(c.phone.following.label == 'r')
-
-       result = q.aggregate(Average(c.phone.duration))
-       print(result)
-
-
-The above aggregate function will return the average duration for all 'aa'
-phones followed by 'r' phones.
-
-Aggregates are particularly useful with grouping.  For instance:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
-       q = q.filter(c.phone.following.label.in_(['r','l']))
-       q = q.group_by(c.phone.following.label.column_name('following_label'))
-
-       result = q.aggregate(Average(c.phone.duration), Count())
-       print(result)
-
-
-The above query will return the average duration and the count of 'aa'
-phones grouped by whether they're followed by an 'r' or an 'l'.
-
-.. note:: In the above example, the :code:`group_by` attribute is supplied with
-   an alias for output.  In the print statment and in the results, the column
-   will be called 'following_label' instead of the default (more opaque) one.
diff --git a/docs/source/queries_annotations.rst b/docs/source/queries_annotations.rst
new file mode 100644
index 00000000..85b92fbd
--- /dev/null
+++ b/docs/source/queries_annotations.rst
@@ -0,0 +1,370 @@
+
+
+.. _annotation_queries:
+
+********************
+Querying annotations
+********************
+
+The main way of finding specific annotations is through the :code:`query_graph` method of
+:code:`CorpusContext` objects.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.word).filter(c.word.label == 'are')
+       results = q.all()
+       print(results)
+
+The above code will find and print all instances of :code:`word` annotations that are
+labeled with 'are'.  The method :code:`query_graph` takes one argument, which is
+an attribute of the context manager corresponding to the name of the
+annotation type.
+
+The primary function for queries is :code:`filter`. This function takes one or more
+conditional expressions on attributes of annotations.  In the above example,
+:code:`word` annotations have an attribute :code:`label` which corresponds to the
+orthography.
+
+Conditional expressions can take on any normal Python conditional (:code:`==`,
+:code:`!=`, :code:`<`, :code:`<=`, :code:`>`, :code:`>=`).  The Python
+operator :code:`in` does not work; a special pattern has to be used:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.word).filter(c.word.label.in_(['are', 'is','am']))
+       results = q.all()
+       print(results)
+
+The :code:`in_` conditional function can take any iterable, including another query:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       sub_q = c.query_graph(c.word).filter(c.word.label.in_(['are', 'is','am']))
+       q = c.query_graph(c.phone).filter(c.phone.word.id.in_(sub_q))
+       results = q.all()
+       print(results)
+
+In this case, it will find all :code:`phone` annotations that are in the words
+listed.  Using the :code:`id` attribute will use unique identifiers for the filter.
+In this particular instance, it does not matter, but it does in the following:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       sub_q = c.query_graph(c.word).filter(c.word.label.in_(['are', 'is','am']))
+       sub_q = sub_q.filter_right_aligned(c.word.line)
+       q = c.query_graph(c.phone).filter(c.phone.word.id.in_(sub_q))
+       results = q.all()
+       print(results)
+
+
+The above query will find all instances of the three words, but only where
+they are right-aligned with a :code:`line` annotation.
+
+.. note:: Queries are lazy evaluated.  In the above example, :code:`sub_q` is
+   not evaluated until :code:`q.all()` is called.  This means that filters
+   can be chained across multiple lines without a performance hit.
+
+.. _following_previous:
+
+Following and previous annotations
+----------------------------------
+
+Filters can reference the surrounding local context.  For instance:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
+       q = q.filter(c.phone.following.label == 'r')
+       results = q.all()
+       print(results)
+
+
+The above query will find all the 'aa' phones that are followed by an 'r'
+phone.  Similarly, :code:`c.phone.previous` would provide access to filtering on
+preceding phones.
+
+.. _subsetting:
+
+Subsetting annotations
+----------------------
+
+In linguistics, it's often useful to specify subsets of symbols as particular classes.
+For instance, phonemes are grouped together by whether they are syllabic,
+their manner/place of articulation, and vowel height/backness/rounding, and
+words are grouped by their parts of speech.
+
+
+Suppose a subset has been created as in :ref:`caching_subsets`, so that the phones 'aa' and 'ih' have been marked as `+syllabic`.
+Once this category is encoded in the database, it can be used in filters.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone)
+       q = q.filter(c.phone.subset=='+syllabic')
+       results = q.all()
+       print(results)
+
+Another way to specify subsets is on the phone annotations themselves, as follows:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone.filter_by_subset('+syllabic'))
+       results = q.all()
+       print(results)
+
+Both of these queries are identical and will return all instances of 'aa' and 'ih' phones.  The benefit of `filter_by_subset`
+is generally for use in :ref:`hierarchical_queries`.
+
+.. note:: Using repeated subsets repeatedly in queries can make them overly
+   verbose.  The objects that the queries use are normal Python objects
+   and can therefore be assigned to variables for easier use.
+
+   .. code-block:: python
+
+      with CorpusContext(config) as c:
+          syl = c.phone.filter_by_subset('+syllabic')
+          q = c.query_graph(syl)
+          q = q.filter(syl.end == syl.word.end)
+          results = q.all()
+          print(results)
+
+    The above query would find all phones marked by '+syllabic' that are
+    at the ends of words.
+
+
+.. _hierarchical_queries:
+
+Hierarchical queries
+--------------------
+
+A key facet of language is that it is hierarchical.  Words contain phones,
+and can be contained in larger utterances.  There are several ways to
+query hierarchical information.  If we want to find all "aa" phones in the
+word "dogs", then we can perform the following query:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
+       q = q.filter(c.phone.word.label == 'dogs')
+       results = q.all()
+       print(results)
+
+Starting from the word level, we might want to know what phones each word
+contains.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.word)
+       q = q.columns(c.word.phone.label.column('phones'))
+       results = q.all()
+       print(results)
+
+In the output of the above query, there would be a column labeled "phones"
+that contains a list of the labels of phones that belong to the word
+(``['d', 'aa', 'g', 'z']``). Any property of phones can be queried this
+way (i.e., 'begin', 'end', 'duration', etc).
+
+Going down the hierarchy, we can also find all words that contain a certain phone.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.word).filter(c.word.label.in_(['are', 'is','am']))
+       q = q.filter(c.word.phone.label == 'aa')
+       results = q.all()
+       print(results)
+
+
+In this example, it will find all instances of the three words that contain
+an 'aa' phone.
+
+Special keywords exist for these containment columns. The keyword 'rate'
+will return the elements per second for the word (i.e., phones per second).
+The keyword 'count' will return the number of elements.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.word)
+       q = q.columns(c.word.phone.rate.column('phones_per_second'))
+       q = q.columns(c.word.phone.count.column('num_phones'))
+       results = q.all()
+       print(results)
+
+These keywords can also leverage subsets, as above:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.word)
+       q = q.columns(c.word.phone.rate.column('phones_per_second'))
+       q = q.columns(c.word.phone.filter_by_subset('+syllabic').count.column('num_syllabic_phones'))
+       q = q.columns(c.word.phone.count.column('num_phones'))
+       results = q.all()
+       print(results)
+
+Additionally, there is a special keyword can be used to query the position
+of a contained element in a containing one.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
+       q = q.filter(c.word.label == 'dogs')
+       q = q.columns(c.word.phone.position.column_name('position_in_word'))
+       results = q.all()
+       print(results)
+
+The above query should return ``2`` for the value of 'position_in_word',
+as the "aa" phone would be the second phone.
+
+
+.. _subannotations:
+
+Subannotations
+--------------
+
+Annotations can have subannotations associated with them.  Subannotations
+are not independent linguistic types, but have more information associated
+with them than just a single property.  For instance, voice onset time (VOT)
+would be a subannotation of stops (as it has a begin time and an end time
+that are of interest).  Querying such subannotations would be performed as follows:
+
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone)
+       q = q.columns(c.phone.vot.duration.column_name('vot'))
+       results = q.all()
+       print(results)
+
+In some cases, it may be desirable to have more than one subannotation of
+the same type associated with a single annotation.  For instance,
+voicing during the closure of a stop can take place at both the beginning
+and end of closure, with an unvoiced period in the middle.  Using a similar
+query as above would get the durations of each of these (in the order of
+their begin time):
+
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone)
+       q = q.columns(c.phone.voicing_during_closure.duration.column_name('voicing'))
+       results = q.all()
+       print(results)
+
+In some cases, we might like to know the total duration of such subannotations,
+rather than the individual durations.  To query that information, we can
+use an ``aggregate``:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone)
+       results = q.aggregate(Sum(c.phone.voicing_during_closure.duration).column_name('total_voicing'))
+       print(results)
+
+
+Miscellaneous
+=============
+
+.. _aggregates_and_groups:
+
+Aggregates and groups
+---------------------
+
+Aggregate functions are available in :code:`polyglotdb.query.func`.  Aggregate
+functions available are:
+
+* Average
+* Count
+* Max
+* Min
+* Stdev
+* Sum
+
+In general, these functions take a numeric attribute as an argument.  The
+only one that does not follow this pattern is :code:`Count`.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
+       q = q.filter(c.phone.following.label == 'r')
+       result = q.aggregate(Count())
+       print(result)
+
+
+Like the :code:`all` function, :code:`aggregate` triggers evaluation of the query.
+Instead of returning rows, it will return a single number, which is the
+number of rows matching this query.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
+       q = q.filter(c.phone.following.label == 'r')
+       result = q.aggregate(Average(c.phone.duration))
+       print(result)
+
+
+The above aggregate function will return the average duration for all 'aa'
+phones followed by 'r' phones.
+
+Aggregates are particularly useful with grouping.  For instance:
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
+       q = q.filter(c.phone.following.label.in_(['r','l']))
+       q = q.group_by(c.phone.following.label.column_name('following_label'))
+       result = q.aggregate(Average(c.phone.duration), Count())
+       print(result)
+
+
+The above query will return the average duration and the count of 'aa'
+phones grouped by whether they're followed by an 'r' or an 'l'.
+
+.. note:: In the above example, the :code:`group_by` attribute is supplied with
+   an alias for output.  In the print statment and in the results, the column
+   will be called 'following_label' instead of the default (more opaque) one.
+
+.. _ordering:
+
+Ordering
+--------
+
+The :code:`order_by` function is used to provide an ordering to the results of
+a query.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
+       q = q.filter(c.phone.following.label.in_(['r','l']))
+       q = q.filter(c.phone.discourse == 'a_discourse')
+       q = q.order_by(c.phone.begin)
+       results = q.all()
+       print(results)
+
+
+The results for the above query will be ordered by the timepoint of the
+annotation.  Ordering by time is most useful for when looking at single
+discourses (as including multiple discourses in a query would invalidate the
+ordering).
+
+.. note:: In grouped aggregate queries, ordering is by default by the
+   first :code:`group_by` attribute.  This can be changed by calling :code:`order_by`
+   before evaluating with :code:`aggregate`.
\ No newline at end of file
diff --git a/docs/source/queries_basic.rst b/docs/source/queries_basic.rst
deleted file mode 100644
index 729446c7..00000000
--- a/docs/source/queries_basic.rst
+++ /dev/null
@@ -1,138 +0,0 @@
-
-
-.. _basic_queries:
-
-************************
-Basic structural queries
-************************
-
-The main way of accessing discourses is through the :code:`query_graph` method of
-:code:`CorpusContext` objects.
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.word).filter(c.word.label == 'are')
-       results = q.all()
-       print(results)
-
-The above code will find and print all instances of :code:`word` annotations that are
-labeled with 'are'.  The method :code:`query_graph` takes one argument, which is
-an attribute of the context manager corresponding to the name of the
-annotation type.
-
-The primary function for queries is :code:`filter`. This function takes one or more
-conditional expressions on attributes of annotations.  In the above example,
-:code:`word` annotations have an attribute :code:`label` which corresponds to the
-orthography.
-
-Conditional expressions can take on any normal Python conditional (:code:`==`,
-:code:`!=`, :code:`<`, :code:`<=`, :code:`>`, :code:`>=`).  The Python
-operator :code:`in` does not work; a special pattern has to be used:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.word).filter(c.word.label.in_(['are', 'is','am']))
-
-       results = q.all()
-       print(results)
-
-The :code:`in_` conditional function can take any iterable, including another query:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       sub_q = c.query_graph(c.word).filter(c.word.label.in_(['are', 'is','am']))
-       q = c.query_graph(c.phone).filter(c.word.id.in_(sub_q))
-
-       results = q.all()
-       print(results)
-
-In this case, it will find all :code:`phone` annotations that are in the words
-listed.  Using the :code:`id` attribute will use unique identifiers for the filter.
-In this particular instance, it does not matter, but it does in the following:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       sub_q = c.query_graph(c.word).filter(c.word.label.in_(['are', 'is','am']))
-       sub_q = sub_q.filter_right_aligned(c.line)
-       q = c.query_graph(c.phone).filter(c.word.id.in_(sub_q))
-       results = q.all()
-       print(results)
-
-
-The above query will find all instances of the three words, but only where
-they are right-aligned with a :code:`line` annotation.
-
-.. note:: Queries are lazy evaluated.  In the above example, :code:`sub_q` is
-   not evaluated until :code:`q.all()` is called.  This means that filters
-   can be chained across multiple lines without a performance hit.
-
-Specialized filters
--------------------
-
-In addition to :code:`filter`, there are several specialized filter functions
-that refer to other types of annotation.  The :code:`filter_right_aligned` was
-shown above.  The full list is:
-
-* filter_left_aligned
-* filter_right_aligned
-* filter_contains
-* filter_contained_by
-
-The alignment filters check whether right edges or the left edges of both
-annotation types are aligned.  The containment filters refer explicitly to
-hierarchical structure.  The :code:`filter_contains` checks whether the higher
-annotation contains a lower annotation that matches the criteria:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.word).filter(c.word.label.in_(['are', 'is','am']))
-       q = q.filter_contains(c.phone.label == 'aa')
-
-       results = q.all()
-       print(results)
-
-
-In this example, it will find all instances of the three words that contain
-an 'aa' phone.
-
-The :code:`filter_contained_by` function does the opposite, checking whether
-the annotation is contained by an annotation that matches a condition:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
-       q = q.filter_contains(c.word.label.in_(['are', 'is','am']))
-
-       results = q.all()
-       print(results)
-
-The above example finds a similar set of labels as the one above that,
-but the returned annotation types are different.
-
-
-.. _following_previous:
-
-Following and previous annotations
-----------------------------------
-
-Filters can reference the surrounding local context.  For instance:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
-       q = q.filter(c.phone.following.label == 'r')
-
-       results = q.all()
-       print(results)
-
-
-The above query will find all the 'aa' phones that are followed by an 'r'
-phone.  Similarly, :code:`c.phone.previous` would provide access to filtering on
-preceding phones.
diff --git a/docs/source/queries_discourse.rst b/docs/source/queries_discourse.rst
new file mode 100644
index 00000000..b8b28c9a
--- /dev/null
+++ b/docs/source/queries_discourse.rst
@@ -0,0 +1,7 @@
+
+
+.. _discourse_queries:
+
+*****************
+Discourse queries
+*****************
diff --git a/docs/source/queries_lexicon.rst b/docs/source/queries_lexicon.rst
new file mode 100644
index 00000000..0cd1f673
--- /dev/null
+++ b/docs/source/queries_lexicon.rst
@@ -0,0 +1,18 @@
+
+
+.. _lexicon_queries:
+
+***************
+Lexicon queries
+***************
+
+Querying the lexicon is in many ways similar to querying annotations in graphs.
+
+.. code-block:: python
+
+   with CorpusContext(config) as c:
+       q = c.query_lexicon(c.lexicon_phone).filter(c.lexicon_phone.label == 'aa')
+       print(q.all())
+
+The above query will just return one result (as there is only one phone type with a given label) as opposed to the multiple
+results returned when querying annotations.
\ No newline at end of file
diff --git a/docs/source/queries_ordering.rst b/docs/source/queries_ordering.rst
deleted file mode 100644
index 89fce705..00000000
--- a/docs/source/queries_ordering.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-.. _ordering:
-
-********
-Ordering
-********
-
-The :code:`order_by` function is used to provide an ordering to the results of
-a query.
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone).filter(c.phone.label == 'aa')
-       q = q.filter(c.phone.following.label.in_(['r','l']))
-       q = q.filter(c.phone.discourse == 'a_discourse')
-       q = q.order_by(c.phone.begin)
-
-       results = q.all()
-       print(results)
-
-
-The results for the above query will be ordered by the timepoint of the
-annotation.  Ordering by time is most useful for when looking at single
-discourses (as including multiple discourses in a query would invalidate the
-ordering).
-
-.. note:: In grouped aggregate queries, ordering is by default by the
-   first :code:`group_by` attribute.  This can be changed by calling :code:`order_by`
-   before evaluating with :code:`aggregate`.
diff --git a/docs/source/queries_reference.rst b/docs/source/queries_reference.rst
new file mode 100644
index 00000000..cd46b1f9
--- /dev/null
+++ b/docs/source/queries_reference.rst
@@ -0,0 +1,122 @@
+
+
+.. _queries_reference:
+
+***************
+Query Reference
+***************
+
+
+Getting elements
+================
+
+:code:`c.phone`
+:code:`c.lexicon_phone`
+:code:`c.speaker`
+
+
+Attributes
+==========
+
+In addition to any values that get added through enrichment, there are several built in attributes that allow access to
+different parts of the database.
+
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Attribute type                       |  Code                                           | Notes                                  |
++======================================+=================================================+========================================+
+| Label [1]_                           |  :code:`c.phone.label`                          |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Name [2]_                            |  :code:`c.speaker.name`                         |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Begin [3]_                           |  :code:`c.phone.begin`                          |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| End [3]_                             |  :code:`c.phone.end`                            |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Duration [3]_                        |  :code:`c.phone.duration`                       |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Previous annotation [3]_             |  :code:`c.phone.previous`                       |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Following annotation [3]_            |  :code:`c.phone.following`                      |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Previous pause [3]_                  |  :code:`c.phone.word.previous_pause`            |  Must be from a `word` annotation      |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Following pause [3]_                 |  :code:`c.phone.word.following_pause`           |  Must be from a `word` annotation      |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Speaker [3]_                         |  :code:`c.phone.speaker`                        |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Discourse [3]_                       |  :code:`c.phone.discourse`                      |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Pitch attribute [3]_                 |  :code:`c.phone.pitch`                          |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Formants attribute [3]_              |  :code:`c.phone.formants`                       |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Intensity attribute [3]_             |  :code:`c.phone.intensity`                      |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Minimum value [4]_                   |  :code:`c.phone.pitch.min`                      |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Maximum value [4]_                   |  :code:`c.phone.pitch.max`                      |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Mean value [4]_                      |  :code:`c.phone.pitch.mean`                     |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Raw track [4]_                       |  :code:`c.phone.pitch.track`                    |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Sampled track [4]_                   |  :code:`c.phone.pitch.sampled_track`            |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Interpolated track [4]_              |  :code:`c.phone.pitch.interpolated_track`       |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+
+
+
+.. [1] Only available for graph annotations and lexicon annotations
+.. [2] Only available for speakers/discourses
+.. [3] Only available for graph annotations
+.. [4] Only available for acoustic attributes
+
+Filters
+=======
+
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Filter type                          |  Code                                           | Notes                                  |
++======================================+=================================================+========================================+
+| Equal                                |  :code:`c.phone.label == 'aa'`                  |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Not equal                            |  :code:`c.phone.label != 'aa'`                  |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Greater than                         |  :code:`c.phone.begin > 0`                      |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Greater than or equal                |  :code:`c.phone.begin >= 0`                     |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Less than                            |  :code:`c.phone.end < 10`                       |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Less than or equal                   |  :code:`c.phone.end <= 10`                      |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| In                                   |  :code:`c.phone.label.in_(['aa','ae'])`         |  :code:`in_` can also take a query     |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Not in                               |:code:`c.phone.label.not_in_(['aa'])`            | :code:`not_in_` can also take a query  |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Is null                              |  :code:`c.phone.label == None`                  |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Is not null                          |  :code:`c.phone.label != None`                  |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Regular expression match             |  :code:`c.phone.label.regex('a,')`              |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| In subset                            |  :code:`c.phone.subset == 'syllabic'`           |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Not in subset                        |  :code:`c.phone.subset != 'syllabic'`           |                                        |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Precedes pause                       |:code:`c.word.precedes_pause == True`            |  Only available for graph annotations  |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Does not precede pause               |:code:`c.word.precedes_pause == False`           |  Only available for graph annotations  |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Follows pause                        |:code:`c.word.follows_pause == True`             |  Only available for graph annotations  |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Does not follow pause                |:code:`c.word.follows_pause == False`            |  Only available for graph annotations  |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Right aligned                        |:code:`c.phone.end == c.phone.word.end`          |  Only available for graph annotations  |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Not right aligned                    |:code:`c.phone.end != c.phone.word.end``         |  Only available for graph annotations  |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Left aligned                         |:code:`c.phone.begin == c.phone.word.begin`      |  Only available for graph annotations  |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
+| Not left aligned                     |:code:`c.phone.begin != c.phone.word.begin`      |  Only available for graph annotations  |
++--------------------------------------+-------------------------------------------------+----------------------------------------+
\ No newline at end of file
diff --git a/docs/source/queries_speakers.rst b/docs/source/queries_speakers.rst
new file mode 100644
index 00000000..917aa0a2
--- /dev/null
+++ b/docs/source/queries_speakers.rst
@@ -0,0 +1,8 @@
+
+
+.. _speaker_queries:
+
+***************
+Speaker queries
+***************
+
diff --git a/docs/source/queries_subannotations.rst b/docs/source/queries_subannotations.rst
deleted file mode 100644
index 18a344c3..00000000
--- a/docs/source/queries_subannotations.rst
+++ /dev/null
@@ -1,53 +0,0 @@
-
-
-.. _subannotations:
-
-
-**************
-Subannotations
-**************
-
-Annotations can have subannotations associated with them.  Subannotations
-are not independent linguistic types, but have more information associated
-with them than just a single property.  For instance, voice onset time (VOT)
-would be a subannotation of stops (as it has a begin time and an end time
-that are of interest).  Querying such subannotations would be performed as follows:
-
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone)
-       q = q.columns(c.phone.vot.duration.column_name('vot'))
-
-       results = q.all()
-       print(results)
-
-In some cases, it may be desirable to have more than one subannotation of
-the same type associated with a single annotation.  For instance,
-voicing during the closure of a stop can take place at both the beginning
-and end of closure, with an unvoiced period in the middle.  Using a similar
-query as above would get the durations of each of these (in the order of
-their begin time):
-
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone)
-       q = q.columns(c.phone.voicing_during_closure.duration.column_name('voicing'))
-
-       results = q.all()
-       print(results)
-
-In some cases, we might like to know the total duration of such subannotations,
-rather than the individual durations.  To query that information, we can
-use an ``aggregate``:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone)
-       results = q.aggregate(Sum(c.phone.voicing_during_closure.duration).column_name('total_voicing'))
-
-       print(results)
diff --git a/docs/source/queries_subpaths.rst b/docs/source/queries_subpaths.rst
deleted file mode 100644
index 3aa06583..00000000
--- a/docs/source/queries_subpaths.rst
+++ /dev/null
@@ -1,81 +0,0 @@
-
-
-.. _subpaths:
-
-
-********************
-Hierarchical queries
-********************
-
-A key facet of language is that it is hierarchical.  Words contain phones,
-and can be contained in larger utterances.  There are several ways to
-query hierarchical information.  If we want to find all "aa" phones in the
-word "dogs", then we can perform the following query:
-
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = g.query_graph(g.phone).filter(g.phone.label == 'aa')
-       q = q.filter_contained_by(g.word.label == 'dogs')
-
-       results = q.all()
-       print(results)
-
-The ``filter`` function can also be used for implicit containment queries:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = g.query_graph(g.phone).filter(g.phone.label == 'aa')
-       q = q.filter(g.word.label == 'dogs')
-
-       results = q.all()
-       print(results)
-
-Starting from the word level, we might want to know what phones each word
-contains.
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = g.query_graph(g.word)
-       q = q.columns(g.word.phone.label.column('phones'))
-
-       results = q.all()
-       print(results)
-
-In the output of the above query, there would be a column labeled "phones"
-that contains a list of the labels of phones that belong to the word
-(``['d', 'aa', 'g', 'z']``). Any property of phones can be queried this
-way (i.e., 'begin', 'end', 'duration', etc).
-
-Special keywords exist for these containment columns. The keyword 'rate'
-will return the elements per second for the word (i.e., phones per second).
-The keyword 'count' will return the number of elements.
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = g.query_graph(g.word)
-       q = q.columns(g.word.phone.rate.column('phones_per_second'))
-       q = q.columns(g.word.phone.count.column('num_phones'))
-
-       results = q.all()
-       print(results)
-
-Additionally, there is a special keyword can be used to query the position
-of a contained element in a containing one.
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = g.query_graph(g.phone).filter(g.phone.label == 'aa')
-       q = q.filter(g.word.label == 'dogs')
-       q = q.columns(g.word.phone.position.column_name('position_in_word'))
-
-       results = q.all()
-       print(results)
-
-The above query should return ``2`` for the value of 'position_in_word',
-as the "aa" phone would be the second phone.
diff --git a/docs/source/queries_subsets.rst b/docs/source/queries_subsets.rst
deleted file mode 100644
index ca612a19..00000000
--- a/docs/source/queries_subsets.rst
+++ /dev/null
@@ -1,52 +0,0 @@
-
-.. _subsetting:
-
-
-**********************
-Subsetting annotations
-**********************
-
-In linguistics, it's often useful to specify subsets of symbols as particular classes.
-For instance, phonemes are grouped together by whether they are syllabic,
-their manner/place of articulation, and vowel height/backness/rounding, and
-words are grouped by their parts of speech.
-
-In PolyglotDB, creating a subset is as follows:
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone).filter(c.phone.label.in_(['aa', 'ih']))
-       q.set_type('+syllabic')
-
-After running that code, the phones 'aa' and 'ih' would be marked in the database
-as '+syllabic'.  The string for the category can contain any characters.
-Once this category is encoded in the database, queries can be run just on
-those subsets.
-
-.. code-block:: python
-
-   with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-       q = c.query_graph(c.phone.subset('+syllabic'))
-
-       results = q.all()
-       print(results)
-
-The above query will return all instances of 'aa' and 'ih' phones.
-
-.. note:: Using repeated subsets repeatedly in queries can make them overly
-   verbose.  The objects that the queries use are normal Python objects
-   and can therefore be assigned to variables for easier use.
-
-   .. code-block:: python
-
-      with CorpusContext(corpus_name = 'my_corpus', **graph_db_login) as c:
-          syl = c.phone.subset('+syllabic')
-          q = c.query_graph(syl)
-          q = q.filter(syl.end == c.word.end)
-
-          results = q.all()
-          print(results)
-
-    The above query would find all phones marked by '+syllabic' that are
-    at the ends of words.
diff --git a/polyglotdb/corpus/featured.py b/polyglotdb/corpus/featured.py
index 4801a79f..c744e001 100644
--- a/polyglotdb/corpus/featured.py
+++ b/polyglotdb/corpus/featured.py
@@ -2,9 +2,22 @@
 from ..io.importer import feature_data_to_csvs, import_feature_csvs
 from .lexical import LexicalContext
 from ..exceptions import SubsetError
+from ..io.enrichment.features import enrich_features_from_csv
 
 
 class FeaturedContext(LexicalContext):
+    def enrich_inventory_from_csv(self, path):
+        """
+        Enriches corpus from a csv file
+
+        Parameters
+        ----------
+        path : str
+            the path to the csv file
+        """
+
+        enrich_features_from_csv(self, path)
+
     def encode_class(self, phones, label):
         """
         encodes phone classes
diff --git a/polyglotdb/corpus/lexical.py b/polyglotdb/corpus/lexical.py
index 2f69d50d..9906745f 100644
--- a/polyglotdb/corpus/lexical.py
+++ b/polyglotdb/corpus/lexical.py
@@ -1,4 +1,5 @@
 from ..io.importer import lexicon_data_to_csvs, import_lexicon_csvs
+from ..io.enrichment.lexical import enrich_lexicon_from_csv
 from .spoken import SpokenContext
 
 
@@ -25,3 +26,16 @@ def enrich_lexicon(self, lexicon_data, type_data=None, case_sensitive=False):
 
     def reset_lexicon(self):
         pass
+
+    def enrich_lexicon_from_csv(self, path, case_sensitive=False):
+        """
+        Enriches lexicon from a csv file
+
+        Parameters
+        ----------
+        path : str
+            the path to the csv file
+        case_sensitive : boolean
+            Defaults to false
+        """
+        enrich_lexicon_from_csv(self, path, case_sensitive)
\ No newline at end of file
diff --git a/polyglotdb/corpus/spoken.py b/polyglotdb/corpus/spoken.py
index 8b546f61..debfd991 100644
--- a/polyglotdb/corpus/spoken.py
+++ b/polyglotdb/corpus/spoken.py
@@ -1,9 +1,32 @@
 from ..io.importer import (speaker_data_to_csvs, import_speaker_csvs,
                            discourse_data_to_csvs, import_discourse_csvs)
 from .audio import AudioContext
+from ..io.enrichment.spoken import enrich_speakers_from_csv, enrich_discourses_from_csv
 
 
 class SpokenContext(AudioContext):
+    def enrich_speakers_from_csv(self, path):
+        """
+        Enriches speakers from a csv file
+
+        Parameters
+        ----------
+        path : str
+            the path to the csv file
+        """
+        enrich_speakers_from_csv(self, path)
+
+    def enrich_discourses_from_csv(self, path):
+        """
+        Enriches discourses from a csv file
+
+        Parameters
+        ----------
+        path : str
+            the path to the csv file
+        """
+        enrich_discourses_from_csv(self, path)
+
     def get_speakers_in_discourse(self,discourse):
         query = '''MATCH (d:Discourse:{corpus_name})<-[:speaks_in]-(s:Speaker:{corpus_name})
                 WHERE d.name = {{discourse_name}}
diff --git a/polyglotdb/corpus/syllabic.py b/polyglotdb/corpus/syllabic.py
index 46d26bb7..36d78fb7 100644
--- a/polyglotdb/corpus/syllabic.py
+++ b/polyglotdb/corpus/syllabic.py
@@ -140,7 +140,7 @@ def has_syllabics(self):
     def has_syllables(self):
         return 'syllable' in self.hierarchy.annotation_types
 
-    def encode_syllables(self, algorithm='probabilistic', call_back=None, stop_check=None):
+    def encode_syllables(self, algorithm='maxonset', call_back=None, stop_check=None):
         """
         Encodes syllables to a corpus
 
@@ -326,7 +326,7 @@ def enrich_syllables(self, syllable_data, type_data=None):
 
         self.encode_hierarchy()
 
-    def encode_stress(self, pattern):
+    def _generate_stress_enrichment(self, pattern):
         """
         encode stress based off of CMUDict cues
 
@@ -357,9 +357,7 @@ def encode_stress(self, pattern):
 
         return enrich_dict
 
-        # self.enrich_syllables(enrich_dict)
-
-    def encode_tone(self, pattern):
+    def _generate_tone_enrichment(self, pattern):
         """
         encode tone based off of CMUDict cues
         """
@@ -385,18 +383,25 @@ def encode_tone(self, pattern):
 
                     enrich_dict.update({syl: {'tone': end}})
         return enrich_dict
-        # self.enrich_syllables(enrich_dict)
 
-    def encode_stresstone_to_syllables(self, encode_type, regex):
+    def encode_stress_to_syllables(self, regex=None, clean_phone_label=True):
+        if regex is None:
+            regex = '[0-9]'
 
-        if encode_type == 'stress':
-            if regex == "":
-                enrich_dict = self.encode_stress('[0-9]')
-            else:
-                enrich_dict = self.encode_stress(regex)
-        else:
-            enrich_dict = self.encode_tone(regex)
+        enrich_dict = self._generate_stress_enrichment(regex)
+
+        if clean_phone_label:
+            self.remove_pattern(regex)
+        self.enrich_syllables(enrich_dict)
+        self.encode_hierarchy()
+
+    def encode_tone_to_syllables(self, regex=None, clean_phone_label=True):
+        if regex is None:
+            regex = '[0-9]'
+
+        enrich_dict = self._generate_tone_enrichment(regex)
 
-        self.remove_pattern(regex)
+        if clean_phone_label:
+            self.remove_pattern(regex)
         self.enrich_syllables(enrich_dict)
         self.encode_hierarchy()
diff --git a/tests/test_client.py b/tests/test_client.py
index ae33790b..d032412b 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -17,8 +17,9 @@ def test_client_create_database(graph_db, localhost):
         response = client.create_database('test_database')
 
     ports = client.get_ports('test_database')
-    assert ports == {'graph_http_port': 7404, 'graph_bolt_port': 7406,
-                     'acoustic_http_port': 8404}
+    assert 'graph_http_port' in ports
+    assert 'graph_bolt_port' in ports
+    assert 'acoustic_http_port' in ports
 
 
 def test_client_database_list(localhost):
diff --git a/tests/test_enrich.py b/tests/test_enrich.py
index 70da59cc..7393e63c 100644
--- a/tests/test_enrich.py
+++ b/tests/test_enrich.py
@@ -111,7 +111,7 @@ def test_stress_enrichment(stressed_config):
     with CorpusContext(stressed_config) as c:
         c.encode_syllabic_segments(syllabics)
         c.encode_syllables("maxonset")
-        c.encode_stresstone_to_syllables('stress', '[0-2]$')
+        c.encode_stress_to_syllables(regex='[0-2]$')
 
         assert (c.hierarchy.has_type_property("syllable", "stress"))
 
diff --git a/tests/test_summarized.py b/tests/test_summarized.py
index 0e1bb023..c4ae7d06 100644
--- a/tests/test_summarized.py
+++ b/tests/test_summarized.py
@@ -167,7 +167,7 @@ def test_syllable_mean_duration(summarized_config):
         print("syllable mean:")
         res = g.get_measure('duration', 'mean', 'syllable')
         print(res)
-        assert (len(res) == 55)
+        assert (len(res) == 57)
         for i, r in enumerate(res):
             if r[0] == 'w.er.d.z':
                 break
@@ -203,7 +203,7 @@ def test_syllable_median(summarized_config):
         res = g.get_measure('duration', 'median', 'syllable')
 
         print(res)
-        assert (len(res) == 55)
+        assert (len(res) == 57)
 
 
 def test_syllable_std_dev(summarized_config):
@@ -215,7 +215,7 @@ def test_syllable_std_dev(summarized_config):
 
         print("syllable std dev:")
         res = g.get_measure('duration', 'stdev', 'syllable')
-        assert (len(res) == 55)
+        assert (len(res) == 57)
         g.reset_syllables()