Skip to content

Commit

Permalink
DOC: Doc build for a single doc made much faster, and clean up (panda…
Browse files Browse the repository at this point in the history
  • Loading branch information
datapythonista authored and Pingviinituutti committed Feb 28, 2019
1 parent 217510b commit 8880ded
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 240 deletions.
292 changes: 92 additions & 200 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@
import sys
import os
import shutil
# import subprocess
import subprocess
import argparse
from contextlib import contextmanager
import webbrowser
import jinja2


DOC_PATH = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -28,179 +26,68 @@
BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates']


@contextmanager
def _maybe_exclude_notebooks():
"""Skip building the notebooks if pandoc is not installed.
This assumes that nbsphinx is installed.
Skip notebook conversion if:
1. nbconvert isn't installed, or
2. nbconvert is installed, but pandoc isn't
"""
# TODO move to exclude_pattern
base = os.path.dirname(__file__)
notebooks = [os.path.join(base, 'source', nb)
for nb in ['style.ipynb']]
contents = {}

def _remove_notebooks():
for nb in notebooks:
with open(nb, 'rt') as f:
contents[nb] = f.read()
os.remove(nb)

try:
import nbconvert
except ImportError:
sys.stderr.write('Warning: nbconvert not installed. '
'Skipping notebooks.\n')
_remove_notebooks()
else:
try:
nbconvert.utils.pandoc.get_pandoc_version()
except nbconvert.utils.pandoc.PandocMissing:
sys.stderr.write('Warning: Pandoc is not installed. '
'Skipping notebooks.\n')
_remove_notebooks()

yield

for nb, content in contents.items():
with open(nb, 'wt') as f:
f.write(content)


class DocBuilder:
"""Class to wrap the different commands of this script.
"""
Class to wrap the different commands of this script.
All public methods of this class can be called as parameters of the
script.
"""
def __init__(self, num_jobs=1, include_api=True, single_doc=None,
verbosity=0):
def __init__(self, num_jobs=0, include_api=True, single_doc=None,
verbosity=0, warnings_are_errors=False):
self.num_jobs = num_jobs
self.include_api = include_api
self.verbosity = verbosity
self.single_doc = None
self.single_doc_type = None
if single_doc is not None:
self._process_single_doc(single_doc)
self.exclude_patterns = self._exclude_patterns

self._generate_index()
if self.single_doc_type == 'docstring':
self._run_os('sphinx-autogen', '-o',
'source/generated_single', 'source/index.rst')

@property
def _exclude_patterns(self):
"""Docs source files that will be excluded from building."""
# TODO move maybe_exclude_notebooks here
if self.single_doc is not None:
rst_files = [f for f in os.listdir(SOURCE_PATH)
if ((f.endswith('.rst') or f.endswith('.ipynb'))
and (f != 'index.rst')
and (f != '{0}.rst'.format(self.single_doc)))]
if self.single_doc_type != 'api':
rst_files += ['generated/*.rst']
elif not self.include_api:
rst_files = ['api.rst', 'generated/*.rst']
else:
rst_files = ['generated_single/*.rst']

exclude_patterns = ','.join(
'{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files)

return exclude_patterns
self.warnings_are_errors = warnings_are_errors

if single_doc:
single_doc = self._process_single_doc(single_doc)
include_api = False
os.environ['SPHINX_PATTERN'] = single_doc
elif not include_api:
os.environ['SPHINX_PATTERN'] = '-api'

self.single_doc_html = None
if single_doc and single_doc.endswith('.rst'):
self.single_doc_html = os.path.splitext(single_doc)[0] + '.html'
elif single_doc:
self.single_doc_html = 'generated/pandas.{}.html'.format(
single_doc)

def _process_single_doc(self, single_doc):
"""Extract self.single_doc (base name) and self.single_doc_type from
passed single_doc kwarg.
"""
self.include_api = False

if single_doc == 'api.rst' or single_doc == 'api':
self.single_doc_type = 'api'
self.single_doc = 'api'
elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
self.single_doc_type = 'rst'
Make sure the provided value for --single is a path to an existing
.rst/.ipynb file, or a pandas object that can be imported.
if 'whatsnew' in single_doc:
basename = single_doc
For example, categorial.rst or pandas.DataFrame.head. For the latter,
return the corresponding file path
(e.g. generated/pandas.DataFrame.head.rst).
"""
base_name, extension = os.path.splitext(single_doc)
if extension in ('.rst', '.ipynb'):
if os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
return single_doc
else:
basename = os.path.basename(single_doc)
self.single_doc = os.path.splitext(basename)[0]
elif os.path.exists(
os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))):
self.single_doc_type = 'rst'
self.single_doc = single_doc
elif single_doc is not None:
raise FileNotFoundError('File {} not found'.format(single_doc))

elif single_doc.startswith('pandas.'):
try:
obj = pandas # noqa: F821
for name in single_doc.split('.'):
obj = getattr(obj, name)
except AttributeError:
raise ValueError('Single document not understood, it should '
'be a file in doc/source/*.rst (e.g. '
'"contributing.rst" or a pandas function or '
'method (e.g. "pandas.DataFrame.head")')
raise ImportError('Could not import {}'.format(single_doc))
else:
self.single_doc_type = 'docstring'
if single_doc.startswith('pandas.'):
self.single_doc = single_doc[len('pandas.'):]
else:
self.single_doc = single_doc

def _copy_generated_docstring(self):
"""Copy existing generated (from api.rst) docstring page because
this is more correct in certain cases (where a custom autodoc
template is used).
"""
fname = os.path.join(SOURCE_PATH, 'generated',
'pandas.{}.rst'.format(self.single_doc))
temp_dir = os.path.join(SOURCE_PATH, 'generated_single')

try:
os.makedirs(temp_dir)
except OSError:
pass

if os.path.exists(fname):
try:
# copying to make sure sphinx always thinks it is new
# and needs to be re-generated (to pick source code changes)
shutil.copy(fname, temp_dir)
except: # noqa
pass

def _generate_index(self):
"""Create index.rst file with the specified sections."""
if self.single_doc_type == 'docstring':
self._copy_generated_docstring()

with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
t = jinja2.Template(f.read())

with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
f.write(t.render(include_api=self.include_api,
single_doc=self.single_doc,
single_doc_type=self.single_doc_type))

@staticmethod
def _create_build_structure():
"""Create directories required to build documentation."""
for dirname in BUILD_DIRS:
try:
os.makedirs(os.path.join(BUILD_PATH, dirname))
except OSError:
pass
return single_doc[len('pandas.'):]
else:
raise ValueError(('--single={} not understood. Value should be a '
'valid path to a .rst or .ipynb file, or a '
'valid pandas object (e.g. categorical.rst or '
'pandas.DataFrame.head)').format(single_doc))

@staticmethod
def _run_os(*args):
"""Execute a command as a OS terminal.
"""
Execute a command as a OS terminal.
Parameters
----------
Expand All @@ -211,16 +98,11 @@ def _run_os(*args):
--------
>>> DocBuilder()._run_os('python', '--version')
"""
# TODO check_call should be more safe, but it fails with
# exclude patterns, needs investigation
# subprocess.check_call(args, stderr=subprocess.STDOUT)
exit_status = os.system(' '.join(args))
if exit_status:
msg = 'Command "{}" finished with exit code {}'
raise RuntimeError(msg.format(' '.join(args), exit_status))
subprocess.check_call(args, stdout=sys.stdout, stderr=sys.stderr)

def _sphinx_build(self, kind):
"""Call sphinx to build documentation.
"""
Call sphinx to build documentation.
Attribute `num_jobs` from the class is used.
Expand All @@ -236,43 +118,44 @@ def _sphinx_build(self, kind):
raise ValueError('kind must be html or latex, '
'not {}'.format(kind))

self._run_os('sphinx-build',
'-j{}'.format(self.num_jobs),
'-b{}'.format(kind),
'-{}'.format(
'v' * self.verbosity) if self.verbosity else '',
'-d"{}"'.format(os.path.join(BUILD_PATH, 'doctrees')),
'-Dexclude_patterns={}'.format(self.exclude_patterns),
'"{}"'.format(SOURCE_PATH),
'"{}"'.format(os.path.join(BUILD_PATH, kind)))

def _open_browser(self):
base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
if self.single_doc_type == 'docstring':
url = os.path.join(
base_url,
'generated_single', 'pandas.{}.html'.format(self.single_doc))
else:
url = os.path.join(base_url, '{}.html'.format(self.single_doc))
self.clean()

cmd = ['sphinx-build', '-b', kind]
if self.num_jobs:
cmd += ['-j', str(self.num_jobs)]
if self.warnings_are_errors:
cmd.append('-W')
if self.verbosity:
cmd.append('-{}'.format('v' * self.verbosity))
cmd += ['-d', os.path.join(BUILD_PATH, 'doctrees'),
SOURCE_PATH, os.path.join(BUILD_PATH, kind)]
cmd = ['sphinx-build', SOURCE_PATH, os.path.join(BUILD_PATH, kind)]
self._run_os(*cmd)

def _open_browser(self, single_doc_html):
"""
Open a browser tab showing single
"""
url = os.path.join('file://', DOC_PATH, 'build', 'html',
single_doc_html)
webbrowser.open(url, new=2)

def html(self):
"""Build HTML documentation."""
self._create_build_structure()
with _maybe_exclude_notebooks():
self._sphinx_build('html')
zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip')
if os.path.exists(zip_fname):
os.remove(zip_fname)

if self.single_doc is not None:
self._open_browser()
shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'),
ignore_errors=True)
"""
Build HTML documentation.
"""
self._sphinx_build('html')
zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip')
if os.path.exists(zip_fname):
os.remove(zip_fname)

if self.single_doc_html is not None:
self._open_browser(self.single_doc_html)

def latex(self, force=False):
"""Build PDF documentation."""
self._create_build_structure()
"""
Build PDF documentation.
"""
if sys.platform == 'win32':
sys.stderr.write('latex build has not been tested on windows\n')
else:
Expand All @@ -289,18 +172,24 @@ def latex(self, force=False):
self._run_os('make')

def latex_forced(self):
"""Build PDF documentation with retries to find missing references."""
"""
Build PDF documentation with retries to find missing references.
"""
self.latex(force=True)

@staticmethod
def clean():
"""Clean documentation generated files."""
"""
Clean documentation generated files.
"""
shutil.rmtree(BUILD_PATH, ignore_errors=True)
shutil.rmtree(os.path.join(SOURCE_PATH, 'generated'),
ignore_errors=True)

def zip_html(self):
"""Compress HTML documentation into a zip file."""
"""
Compress HTML documentation into a zip file.
"""
zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip')
if os.path.exists(zip_fname):
os.remove(zip_fname)
Expand All @@ -326,7 +215,7 @@ def main():
help='command to run: {}'.format(', '.join(cmds)))
argparser.add_argument('--num-jobs',
type=int,
default=1,
default=0,
help='number of jobs used by sphinx-build')
argparser.add_argument('--no-api',
default=False,
Expand All @@ -345,6 +234,9 @@ def main():
argparser.add_argument('-v', action='count', dest='verbosity', default=0,
help=('increase verbosity (can be repeated), '
'passed to the sphinx build command'))
argparser.add_argument('--warnings-are-errors', '-W',
action='store_true',
help='fail if warnings are raised')
args = argparser.parse_args()

if args.command not in cmds:
Expand All @@ -364,7 +256,7 @@ def main():
os.environ['MPLBACKEND'] = 'module://matplotlib.backends.backend_agg'

builder = DocBuilder(args.num_jobs, not args.no_api, args.single,
args.verbosity)
args.verbosity, args.warnings_are_errors)
getattr(builder, args.command)()


Expand Down
Loading

0 comments on commit 8880ded

Please sign in to comment.