Merge pull request #1183 from RasaHQ/docs_reorg

Docs reorg
RasaHQ · Jun 27, 2018 · 7a37ff8 · 7a37ff8
2 parents ed00590 + f9cd039
commit 7a37ff8
Show file tree

Hide file tree

Showing 29 changed files with 837 additions and 922 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -12,7 +12,7 @@ env:
   # needed to fix issues with boto during testing:
   # https://github.com/travis-ci/travis-ci/issues/7940
   global: BOTO_CONFIG=/dev/null
-install:
+install: 
   - pip install git+https://github.com/tmbo/MITIE.git
   - pip install -r alt_requirements/requirements_dev.txt
   - pip install -e .
@@ -27,7 +27,7 @@ install:
 before_script:
   - mkdir $HOME/tmp
   - export TMPDIR=$HOME/tmp
-script:
+script: 
   - py.test --pep8 -m pep8
   - py.test tests/base --cov rasa_nlu -v --cov-append
   - py.test tests/training --cov rasa_nlu -v --cov-append
@@ -38,9 +38,12 @@ jobs:
   - stage: docs
     if: fork = false  # forked repository will skip building docss
     install:
-    - pip install sphinx==1.5.2 sphinx-autobuild==0.7.1 sphinxcontrib-versioning==2.2.1 sphinxcontrib-programoutput==0.11 nbsphinx==0.2.18
+    - pip install sphinx==1.7.5 sphinx-autobuild==0.7.1 sphinxcontrib-programoutput==0.11 nbsphinx==0.3.2 sphinxcontrib-httpdomain==1.5.0
+    - pip install git+https://${GITHUB_TOKEN}:x-oauth-basic@github.com/RasaHQ/sphinxcontrib-versioning.git@version_list
+    - pip install git+https://${GITHUB_TOKEN}:x-oauth-basic@github.com/RasaHQ/rasabaster.git#egg=rasabaster
     - pip install -e git://github.com/RasaHQ/sphinx_rtd_theme.git#egg=sphinx_rtd_theme
     - pip install -e .
+    - pip list
     script:
     - eval "$(ssh-agent -s)"; touch docs/key; chmod 0600 docs/key
     - openssl aes-256-cbc
@@ -53,7 +56,7 @@ jobs:
     - git config --global user.name "Travis CI"
     - git remote set-url --push origin "git@github.com:$TRAVIS_REPO_SLUG"
     - export ${!TRAVIS*}
-    - sphinx-versioning push docs docs .
+    - sphinx-versioning push -r docs_reorg docs newdocs . -- -b dirhtml  -A html_theme=rasabaster
   - stage: deploy
     install: skip
     script: skip

diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css
diff --git a/docs/choosing_pipeline.rst b/docs/choosing_pipeline.rst
@@ -0,0 +1,82 @@
+.. _choosing_pipeline:
+
+Choosing a Pipeline
+===================
+
+
+Pre-trained or custom word vectors?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The two most important pipelines are ``tensorflow_embedding`` and ``spacy_sklearn``.
+The biggest difference between them is that the ``spacy_sklearn`` pipeline uses pre-trained
+word vectors from either GloVe or fastText. Instead, the tensorflow embedding pipeline
+doesn't use any pre-trained word vectors, but instead fits these specifically for your dataset.
+
+The advantage of the ``spacy_sklearn`` pipeline is that if you have a training example like:
+"I want to buy apples", and Rasa is asked to predict the intent for "I want to buy pears", your model
+already knows that the words "apples" and "pears" are very similar. This is especially useful
+if you don't have very much training data (< 500 labeled examples). 
+
+The advantage of the ``tensorflow_embedding`` pipeline is that your word vectors will be customised 
+for your domain. For example, in general English, the word "balance" is closely related to "symmetry",
+but very different to the word "cash". In a banking domain, "balance" and "cash" are closely related
+and you'd like your model to capture that.
+
+
+You can read more about this topic `here <https://medium.com/rasa-blog/supervised-word-vectors-from-scratch-in-rasa-nlu-6daf794efcd8>`_ . 
+
+
+As a rule of thumb, if there is a spaCy model for your language, 
+then the ``spacy_sklearn`` pipeline is a good choice for getting started. 
+However once you have more training data (>500 sentences), 
+it is highly recommended that you try the ``tensorflow_embedding`` pipeline.
+
+There are also the ``mitie`` and ``mitie_sklearn`` pipelines, which use MITIE as a source of word vectors. 
+We do not recommend that you use these; they are likely to be deprecated in a future release.
+
+
+Multiple Intents
+^^^^^^^^^^^^^^^^
+
+
+If you want to split intents into multiple labels, 
+e.g. for predicting multiple intents or for modeling hierarchical intent structure,
+you can only do this with the tensorflow pipeline.
+To do this, use these flags:
+
+    - ``intent_tokenization_flag`` if ``true`` the algorithm will split the intent labels into tokens and use a bag-of-words representations for them;
+    - ``intent_split_symbol`` sets the delimiter string to split the intent labels. Default ``_``
+
+
+Here's an example configuration:
+
+.. code-block:: yaml
+
+    language: "en"
+
+    pipeline:
+    - name: "intent_featurizer_count_vectors"
+    - name: "intent_classifier_tensorflow_embedding"
+      intent_tokenization_flag: true
+      intent_split_symbol: "_"
+
+
+
+Custom pipelines
+~~~~~~~~~~~~~~~~
+
+Creating your own pipelines is possible by directly passing the names of the
+components to Rasa NLU in the ``pipeline`` configuration variable, e.g.
+
+.. code-block:: yaml
+
+    pipeline:
+    - name: "nlp_spacy"
+    - name: "ner_crf"
+    - name: "ner_synonyms"
+
+This creates a pipeline that only does entity recognition, but no
+intent classification. Hence, the output will not contain any
+useful intents. You can find the details of each component in :ref:`section_pipeline`.
+
+If you want to use custom components in your pipeline, see :ref:`section_customcomponents`. 
diff --git a/docs/closeloop.rst b/docs/closeloop.rst
diff --git a/docs/community.rst b/docs/community.rst
diff --git a/docs/conf.py b/docs/conf.py
@@ -19,6 +19,7 @@
 
 # -- General configuration ------------------------------------------------
 import re
+import rasabaster
 
 nitpicky = True
 linkcheck_anchors_ignore = [".*"]
@@ -32,13 +33,18 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.napoleon',
     'sphinx.ext.mathjax',
     'sphinx.ext.doctest',
     'sphinxcontrib.programoutput',
+    'sphinxcontrib.httpdomain',
+    'rasabaster.button',
+    'rasabaster.card'
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = []
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
@@ -119,11 +125,18 @@
 # a list of builtin themes.
 #html_theme = 'default'
 
-html_theme = "sphinx_rtd_theme"
+html_theme = "rasabaster"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
+html_theme_options = {
+    'description': "Rasa",
+    'github_user': 'RasaHQ',
+    'github_repo': 'rasa_nlu',
+    'fixed_sidebar': True,
+    'product': "NLU"
+}
 #html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
@@ -165,7 +178,9 @@
 #html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+html_sidebars = {
+   '**': ['rasaglobaltoc.html']
+}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
@@ -310,7 +325,8 @@
 scv_show_banner = True
 scv_banner_greatest_tag = True
 scv_sort = ('semver',)
-scv_whitelist_branches = ('master', 'latest')
+scv_whitelist_branches = ('docs_reorg','master', 'latest')
+#scv_whitelist_tags = ('None',)
 scv_grm_exclude = ('README.md', '.gitignore', '.nojekyll', 'CNAME')
 scv_whitelist_tags = (re.compile(r'^[123456789]+\.[0-9]+\.\d+$'),
                       re.compile(r'^0\.[123456789][23456789]+\.\d+$'),