DOC: Doc build for a single doc made much faster, and clean up (panda…

…s-dev#24428)
Pingviinituutti · Feb 28, 2019 · 8880ded · 8880ded
1 parent 217510b
commit 8880ded
Show file tree

Hide file tree

Showing 3 changed files with 159 additions and 240 deletions.
diff --git a/doc/make.py b/doc/make.py
@@ -15,11 +15,9 @@
 import sys
 import os
 import shutil
-# import subprocess
+import subprocess
 import argparse
-from contextlib import contextmanager
 import webbrowser
-import jinja2
 
 
 DOC_PATH = os.path.dirname(os.path.abspath(__file__))
@@ -28,179 +26,68 @@
 BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates']
 
 
-@contextmanager
-def _maybe_exclude_notebooks():
-    """Skip building the notebooks if pandoc is not installed.
-
-    This assumes that nbsphinx is installed.
-
-    Skip notebook conversion if:
-    1. nbconvert isn't installed, or
-    2. nbconvert is installed, but pandoc isn't
-    """
-    # TODO move to exclude_pattern
-    base = os.path.dirname(__file__)
-    notebooks = [os.path.join(base, 'source', nb)
-                 for nb in ['style.ipynb']]
-    contents = {}
-
-    def _remove_notebooks():
-        for nb in notebooks:
-            with open(nb, 'rt') as f:
-                contents[nb] = f.read()
-            os.remove(nb)
-
-    try:
-        import nbconvert
-    except ImportError:
-        sys.stderr.write('Warning: nbconvert not installed. '
-                         'Skipping notebooks.\n')
-        _remove_notebooks()
-    else:
-        try:
-            nbconvert.utils.pandoc.get_pandoc_version()
-        except nbconvert.utils.pandoc.PandocMissing:
-            sys.stderr.write('Warning: Pandoc is not installed. '
-                             'Skipping notebooks.\n')
-            _remove_notebooks()
-
-    yield
-
-    for nb, content in contents.items():
-        with open(nb, 'wt') as f:
-            f.write(content)
-
-
 class DocBuilder:
-    """Class to wrap the different commands of this script.
+    """
+    Class to wrap the different commands of this script.
 
     All public methods of this class can be called as parameters of the
     script.
     """
-    def __init__(self, num_jobs=1, include_api=True, single_doc=None,
-                 verbosity=0):
+    def __init__(self, num_jobs=0, include_api=True, single_doc=None,
+                 verbosity=0, warnings_are_errors=False):
         self.num_jobs = num_jobs
-        self.include_api = include_api
         self.verbosity = verbosity
-        self.single_doc = None
-        self.single_doc_type = None
-        if single_doc is not None:
-            self._process_single_doc(single_doc)
-        self.exclude_patterns = self._exclude_patterns
-
-        self._generate_index()
-        if self.single_doc_type == 'docstring':
-            self._run_os('sphinx-autogen', '-o',
-                         'source/generated_single', 'source/index.rst')
-
-    @property
-    def _exclude_patterns(self):
-        """Docs source files that will be excluded from building."""
-        # TODO move maybe_exclude_notebooks here
-        if self.single_doc is not None:
-            rst_files = [f for f in os.listdir(SOURCE_PATH)
-                         if ((f.endswith('.rst') or f.endswith('.ipynb'))
-                             and (f != 'index.rst')
-                             and (f != '{0}.rst'.format(self.single_doc)))]
-            if self.single_doc_type != 'api':
-                rst_files += ['generated/*.rst']
-        elif not self.include_api:
-            rst_files = ['api.rst', 'generated/*.rst']
-        else:
-            rst_files = ['generated_single/*.rst']
-
-        exclude_patterns = ','.join(
-            '{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files)
-
-        return exclude_patterns
+        self.warnings_are_errors = warnings_are_errors
+
+        if single_doc:
+            single_doc = self._process_single_doc(single_doc)
+            include_api = False
+            os.environ['SPHINX_PATTERN'] = single_doc
+        elif not include_api:
+            os.environ['SPHINX_PATTERN'] = '-api'
+
+        self.single_doc_html = None
+        if single_doc and single_doc.endswith('.rst'):
+            self.single_doc_html = os.path.splitext(single_doc)[0] + '.html'
+        elif single_doc:
+            self.single_doc_html = 'generated/pandas.{}.html'.format(
+                single_doc)
 
     def _process_single_doc(self, single_doc):
-        """Extract self.single_doc (base name) and self.single_doc_type from
-        passed single_doc kwarg.
-
         """
-        self.include_api = False
-
-        if single_doc == 'api.rst' or single_doc == 'api':
-            self.single_doc_type = 'api'
-            self.single_doc = 'api'
-        elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
-            self.single_doc_type = 'rst'
+        Make sure the provided value for --single is a path to an existing
+        .rst/.ipynb file, or a pandas object that can be imported.
 
-            if 'whatsnew' in single_doc:
-                basename = single_doc
+        For example, categorial.rst or pandas.DataFrame.head. For the latter,
+        return the corresponding file path
+        (e.g. generated/pandas.DataFrame.head.rst).
+        """
+        base_name, extension = os.path.splitext(single_doc)
+        if extension in ('.rst', '.ipynb'):
+            if os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
+                return single_doc
             else:
-                basename = os.path.basename(single_doc)
-            self.single_doc = os.path.splitext(basename)[0]
-        elif os.path.exists(
-                os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))):
-            self.single_doc_type = 'rst'
-            self.single_doc = single_doc
-        elif single_doc is not None:
+                raise FileNotFoundError('File {} not found'.format(single_doc))
+
+        elif single_doc.startswith('pandas.'):
             try:
                 obj = pandas  # noqa: F821
                 for name in single_doc.split('.'):
                     obj = getattr(obj, name)
             except AttributeError:
-                raise ValueError('Single document not understood, it should '
-                                 'be a file in doc/source/*.rst (e.g. '
-                                 '"contributing.rst" or a pandas function or '
-                                 'method (e.g. "pandas.DataFrame.head")')
+                raise ImportError('Could not import {}'.format(single_doc))
             else:
-                self.single_doc_type = 'docstring'
-                if single_doc.startswith('pandas.'):
-                    self.single_doc = single_doc[len('pandas.'):]
-                else:
-                    self.single_doc = single_doc
-
-    def _copy_generated_docstring(self):
-        """Copy existing generated (from api.rst) docstring page because
-        this is more correct in certain cases (where a custom autodoc
-        template is used).
-
-        """
-        fname = os.path.join(SOURCE_PATH, 'generated',
-                             'pandas.{}.rst'.format(self.single_doc))
-        temp_dir = os.path.join(SOURCE_PATH, 'generated_single')
-
-        try:
-            os.makedirs(temp_dir)
-        except OSError:
-            pass
-
-        if os.path.exists(fname):
-            try:
-                # copying to make sure sphinx always thinks it is new
-                # and needs to be re-generated (to pick source code changes)
-                shutil.copy(fname, temp_dir)
-            except:  # noqa
-                pass
-
-    def _generate_index(self):
-        """Create index.rst file with the specified sections."""
-        if self.single_doc_type == 'docstring':
-            self._copy_generated_docstring()
-
-        with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
-            t = jinja2.Template(f.read())
-
-        with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
-            f.write(t.render(include_api=self.include_api,
-                             single_doc=self.single_doc,
-                             single_doc_type=self.single_doc_type))
-
-    @staticmethod
-    def _create_build_structure():
-        """Create directories required to build documentation."""
-        for dirname in BUILD_DIRS:
-            try:
-                os.makedirs(os.path.join(BUILD_PATH, dirname))
-            except OSError:
-                pass
+                return single_doc[len('pandas.'):]
+        else:
+            raise ValueError(('--single={} not understood. Value should be a '
+                              'valid path to a .rst or .ipynb file, or a '
+                              'valid pandas object (e.g. categorical.rst or '
+                              'pandas.DataFrame.head)').format(single_doc))
 
     @staticmethod
     def _run_os(*args):
-        """Execute a command as a OS terminal.
+        """
+        Execute a command as a OS terminal.
 
         Parameters
         ----------
@@ -211,16 +98,11 @@ def _run_os(*args):
         --------
         >>> DocBuilder()._run_os('python', '--version')
         """
-        # TODO check_call should be more safe, but it fails with
-        # exclude patterns, needs investigation
-        # subprocess.check_call(args, stderr=subprocess.STDOUT)
-        exit_status = os.system(' '.join(args))
-        if exit_status:
-            msg = 'Command "{}" finished with exit code {}'
-            raise RuntimeError(msg.format(' '.join(args), exit_status))
+        subprocess.check_call(args, stdout=sys.stdout, stderr=sys.stderr)
 
     def _sphinx_build(self, kind):
-        """Call sphinx to build documentation.
+        """
+        Call sphinx to build documentation.
 
         Attribute `num_jobs` from the class is used.
 
@@ -236,43 +118,44 @@ def _sphinx_build(self, kind):
             raise ValueError('kind must be html or latex, '
                              'not {}'.format(kind))
 
-        self._run_os('sphinx-build',
-                     '-j{}'.format(self.num_jobs),
-                     '-b{}'.format(kind),
-                     '-{}'.format(
-                         'v' * self.verbosity) if self.verbosity else '',
-                     '-d"{}"'.format(os.path.join(BUILD_PATH, 'doctrees')),
-                     '-Dexclude_patterns={}'.format(self.exclude_patterns),
-                     '"{}"'.format(SOURCE_PATH),
-                     '"{}"'.format(os.path.join(BUILD_PATH, kind)))
-
-    def _open_browser(self):
-        base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
-        if self.single_doc_type == 'docstring':
-            url = os.path.join(
-                base_url,
-                'generated_single', 'pandas.{}.html'.format(self.single_doc))
-        else:
-            url = os.path.join(base_url, '{}.html'.format(self.single_doc))
+        self.clean()
+
+        cmd = ['sphinx-build', '-b', kind]
+        if self.num_jobs:
+            cmd += ['-j', str(self.num_jobs)]
+        if self.warnings_are_errors:
+            cmd.append('-W')
+        if self.verbosity:
+            cmd.append('-{}'.format('v' * self.verbosity))
+        cmd += ['-d', os.path.join(BUILD_PATH, 'doctrees'),
+                SOURCE_PATH, os.path.join(BUILD_PATH, kind)]
+        cmd = ['sphinx-build', SOURCE_PATH, os.path.join(BUILD_PATH, kind)]
+        self._run_os(*cmd)
+
+    def _open_browser(self, single_doc_html):
+        """
+        Open a browser tab showing single
+        """
+        url = os.path.join('file://', DOC_PATH, 'build', 'html',
+                           single_doc_html)
         webbrowser.open(url, new=2)
 
     def html(self):
-        """Build HTML documentation."""
-        self._create_build_structure()
-        with _maybe_exclude_notebooks():
-            self._sphinx_build('html')
-            zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip')
-            if os.path.exists(zip_fname):
-                os.remove(zip_fname)
-
-        if self.single_doc is not None:
-            self._open_browser()
-            shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'),
-                          ignore_errors=True)
+        """
+        Build HTML documentation.
+        """
+        self._sphinx_build('html')
+        zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip')
+        if os.path.exists(zip_fname):
+            os.remove(zip_fname)
+
+        if self.single_doc_html is not None:
+            self._open_browser(self.single_doc_html)
 
     def latex(self, force=False):
-        """Build PDF documentation."""
-        self._create_build_structure()
+        """
+        Build PDF documentation.
+        """
         if sys.platform == 'win32':
             sys.stderr.write('latex build has not been tested on windows\n')
         else:
@@ -289,18 +172,24 @@ def latex(self, force=False):
                 self._run_os('make')
 
     def latex_forced(self):
-        """Build PDF documentation with retries to find missing references."""
+        """
+        Build PDF documentation with retries to find missing references.
+        """
         self.latex(force=True)
 
     @staticmethod
     def clean():
-        """Clean documentation generated files."""
+        """
+        Clean documentation generated files.
+        """
         shutil.rmtree(BUILD_PATH, ignore_errors=True)
         shutil.rmtree(os.path.join(SOURCE_PATH, 'generated'),
                       ignore_errors=True)
 
     def zip_html(self):
-        """Compress HTML documentation into a zip file."""
+        """
+        Compress HTML documentation into a zip file.
+        """
         zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip')
         if os.path.exists(zip_fname):
             os.remove(zip_fname)
@@ -326,7 +215,7 @@ def main():
                            help='command to run: {}'.format(', '.join(cmds)))
     argparser.add_argument('--num-jobs',
                            type=int,
-                           default=1,
+                           default=0,
                            help='number of jobs used by sphinx-build')
     argparser.add_argument('--no-api',
                            default=False,
@@ -345,6 +234,9 @@ def main():
     argparser.add_argument('-v', action='count', dest='verbosity', default=0,
                            help=('increase verbosity (can be repeated), '
                                  'passed to the sphinx build command'))
+    argparser.add_argument('--warnings-are-errors', '-W',
+                           action='store_true',
+                           help='fail if warnings are raised')
     args = argparser.parse_args()
 
     if args.command not in cmds:
@@ -364,7 +256,7 @@ def main():
     os.environ['MPLBACKEND'] = 'module://matplotlib.backends.backend_agg'
 
     builder = DocBuilder(args.num_jobs, not args.no_api, args.single,
-                         args.verbosity)
+                         args.verbosity, args.warnings_are_errors)
     getattr(builder, args.command)()