Skip to content

Commit

Permalink
docs: fetch all external resources when building docs (#142)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
Borda and pre-commit-ci[bot] committed Jun 29, 2023
1 parent de1efa9 commit e0acfd8
Show file tree
Hide file tree
Showing 20 changed files with 217 additions and 58 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@ jobs:

- name: Install dependencies
run: |
pip install -e . -U --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
pip install -q -r requirements/test.txt
python --version
pip install -e . -U -r requirements/dev-tests.txt \
-f https://download.pytorch.org/whl/cpu/torch_stable.html
pip --version
pip list
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/ci-use-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ jobs:
check-docs:
uses: ./.github/workflows/check-docs.yml
with:
requirements-file: 'requirements/dev-docs.txt'

check-md-links:
uses: ./.github/workflows/check-md-links.yml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deploy-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:

- name: Install dependencies
run: |
pip install -e . -U -q -r requirements/docs.txt \
pip install -e . -U -q -r requirements/dev-docs.txt \
-f https://download.pytorch.org/whl/cpu/torch_stable.html
# install Texlive, see https://linuxconfig.org/how-to-install-latex-on-ubuntu-20-04-focal-fossa-linux
sudo apt-get update --fix-missing
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ coverage.xml

# Sphinx documentation
docs/_build/
docs/source/fetched-s3-assets
docs/source/api/
docs/source/*.md

Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export SPHINX_MOCK_REQUIREMENTS=0

test:
pip install -q -r requirements/cli.txt
pip install -q -r requirements/test.txt
pip install -q -r requirements/dev-tests.txt

# use this to run tests
rm -rf _ckpt_*
Expand All @@ -19,7 +19,7 @@ test:
# python -m coverage run --source src/lightning_utilities -m pytest --flake8 --durations=0 -v -k

docs: clean
pip install -e . -q -r requirements/docs.txt
pip install -e . -q -r requirements/dev-docs.txt
cd docs && $(MAKE) html

clean:
Expand Down
39 changes: 14 additions & 25 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import glob
import inspect
import os
Expand All @@ -19,6 +13,14 @@
import pt_lightning_sphinx_theme

import lightning_utilities
from lightning_utilities.docs import fetch_external_assets

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.


_PATH_HERE = os.path.abspath(os.path.dirname(__file__))
_PATH_ROOT = os.path.realpath(os.path.join(_PATH_HERE, "..", ".."))
Expand All @@ -43,37 +45,24 @@

# -- Project documents -------------------------------------------------------


def _transform_changelog(path_in: str, path_out: str) -> None:
with open(path_in) as fp:
chlog_lines = fp.readlines()
# enrich short subsub-titles to be unique
chlog_ver = ""
for i, ln in enumerate(chlog_lines):
if ln.startswith("## "):
chlog_ver = ln[2:].split("-")[0].strip()
elif ln.startswith("### "):
ln = ln.replace("###", f"### {chlog_ver} -")
chlog_lines[i] = ln
with open(path_out, "w") as fp:
fp.writelines(chlog_lines)
fetch_external_assets(docs_folder=_PATH_HERE)


def _convert_markdown(path_in: str, path_out: str) -> None:
# export the READme
def _convert_markdown(path_in: str, path_out: str, path_root: str) -> None:
with open(path_in) as fp:
readme = fp.read()
# TODO: temp fix removing SVG badges and GIF, because PDF cannot show them
readme = re.sub(r"(\[!\[.*\))", "", readme)
readme = re.sub(r"(!\[.*.gif\))", "", readme)
folder_names = (os.path.basename(p) for p in glob.glob(os.path.join(_PATH_ROOT, "*")) if os.path.isdir(p))
folder_names = (os.path.basename(p) for p in glob.glob(os.path.join(path_root, "*")) if os.path.isdir(p))
for dir_name in folder_names:
readme = readme.replace("](%s/" % dir_name, "](%s/" % os.path.join(_PATH_ROOT, dir_name))
readme = readme.replace("](%s/" % dir_name, "](%s/" % os.path.join(path_root, dir_name))
with open(path_out, "w") as fp:
fp.write(readme)


# export the READme
_convert_markdown(os.path.join(_PATH_ROOT, "README.md"), "readme.md")
_convert_markdown(os.path.join(_PATH_ROOT, "README.md"), "readme.md", _PATH_ROOT)

# -- General configuration ---------------------------------------------------

Expand Down
11 changes: 8 additions & 3 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@
Lightning-DevToolbox documentation
==================================

.. figure:: https://pl-public-data.s3.amazonaws.com/assets_lightning/Lightning.gif
:alt: What is Lightning gif.
:width: 100 %

.. toctree::
:maxdepth: 1
:name: start
:caption: Start here
:name: content
:caption: Overview

readme
Utilities readme <readme>


Indices and tables
Expand Down
16 changes: 16 additions & 0 deletions requirements/dev-docs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
sphinx >=6.0,<7.0
myst-parser >=2.0.0, <3.0.0
nbsphinx >=0.8.5
ipython[notebook]
pandoc >=1.0
docutils >=0.16
# https://github.com/jupyterlab/jupyterlab_pygments/issues/5
pygments >=2.4.1
sphinxcontrib-fulltoc >=1.0
sphinxcontrib-mockautodoc

pt-lightning-sphinx-theme @ https://github.com/Lightning-AI/lightning_sphinx_theme/archive/master.zip
sphinx-autodoc-typehints >=1.0
sphinx-paramlinks >=0.5.1
sphinx-togglebutton >=0.2
sphinx-copybutton >=0.3
4 changes: 4 additions & 0 deletions requirements/dev-tests.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
coverage ==6.5.0
pytest ==7.2.0
pytest-cov ==4.0.0
pytest-timeout ==2.1.0
17 changes: 1 addition & 16 deletions requirements/docs.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1 @@
sphinx >=6.0,<7.0
myst-parser >=2.0.0, <3.0.0
nbsphinx >=0.8.5
ipython[notebook]
pandoc >=1.0
docutils >=0.16
# https://github.com/jupyterlab/jupyterlab_pygments/issues/5
pygments >=2.4.1
sphinxcontrib-fulltoc >=1.0
sphinxcontrib-mockautodoc

pt-lightning-sphinx-theme @ https://github.com/Lightning-AI/lightning_sphinx_theme/archive/master.zip
sphinx-autodoc-typehints >=1.0
sphinx-paramlinks >=0.5.1
sphinx-togglebutton >=0.2
sphinx-copybutton >=0.3
requests >=2.0.0
4 changes: 0 additions & 4 deletions requirements/test.txt

This file was deleted.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _load_py_module(fname: str, pkg: str = "lightning_utilities"):
# make extras as automated loading
requirements_extra = {}
for fpath in glob.glob(os.path.join(_PATH_REQUIRE, "*.txt")):
if os.path.basename(fpath) == "base.txt":
if os.path.basename(fpath) in ("base.txt", "dev-docs.txt", "dev-tests.txt"):
continue
name, _ = os.path.splitext(os.path.basename(fpath))
with open(fpath) as fp:
Expand Down
4 changes: 1 addition & 3 deletions src/lightning_utilities/__about__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import time

__version__ = "0.8.0"
__version__ = "0.9.0dev"
__author__ = "Lightning AI et al."
__author_email__ = "pytorch@lightning.ai"
__license__ = "Apache-2.0"
__copyright__ = f"Copyright (c) 2022-{time.strftime('%Y')}, {__author__}."
__homepage__ = "https://github.com/Lightning-AI/utilities"
__docs__ = "PyTorch Lightning Sample project."
__long_doc__ = """
What is it?
-----------
This package allows for sharing GH workflows, CI/CD assistance actions, and Python utilities across the Lightning
ecosystem.
"""
Expand Down
1 change: 1 addition & 0 deletions src/lightning_utilities/docs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from lightning_utilities.docs.retriever import fetch_external_assets
27 changes: 27 additions & 0 deletions src/lightning_utilities/docs/formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# http://www.apache.org/licenses/LICENSE-2.0
#
import glob
import os
import re


def _transform_changelog(path_in: str, path_out: str) -> None:
"""Adjust changelog titles so not to be duplicated.
Args:
path_in: input MD file
path_out: output also MD file
"""
with open(path_in) as fp:
chlog_lines = fp.readlines()
# enrich short subsub-titles to be unique
chlog_ver = ""
for i, ln in enumerate(chlog_lines):
if ln.startswith("## "):
chlog_ver = ln[2:].split("-")[0].strip()
elif ln.startswith("### "):
ln = ln.replace("###", f"### {chlog_ver} -")
chlog_lines[i] = ln
with open(path_out, "w") as fp:
fp.writelines(chlog_lines)
99 changes: 99 additions & 0 deletions src/lightning_utilities/docs/retriever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# http://www.apache.org/licenses/LICENSE-2.0
#
import glob
import logging
import os
import re
from typing import List, Tuple

import requests


def _download_file(file_url: str, folder: str) -> str:
"""Download a file from URL to a particular folder."""
fname = os.path.basename(file_url)
file_path = os.path.join(folder, fname)
if os.path.isfile(file_path):
logging.warning(f'given file "{file_path}" already exists and will be overwritten with {file_url}')
# see: https://stackoverflow.com/a/34957875
rq = requests.get(file_url)
with open(file_path, "wb") as outfile:
outfile.write(rq.content)
return fname


def _search_all_occurrences(list_files: List[str], pattern: str) -> List[str]:
"""Search for all occurrences of specific patter in a collection of files.
Args:
list_files: list of files to be scanned
pattern: pattern for search, reg. expression
"""
collected = []
for file_path in list_files:
with open(file_path, encoding="UTF-8") as fo:
body = fo.read()
found = re.findall(pattern, body)
collected += found
return collected


def _replace_remote_with_local(
file_path: str, docs_folder: str, pairs_url_path: List[Tuple[str, str]], base_depth: int = 2
) -> None:
"""Replace all URL with local files in a given file.
Args:
file_path: file for replacement
docs_folder: the location of docs related to the project root
pairs_url_path: pairs of URL and local file path to be swapped
"""
# drop the default/global path to the docs
relt_path = os.path.dirname(file_path).replace(docs_folder, "")
# filter the path starting with / as not empty folder names
depth = len([p for p in relt_path.split(os.path.sep) if p])
with open(file_path, encoding="UTF-8") as fo:
body = fo.read()
for url, fpath in pairs_url_path:
if depth:
path_up = [".."] * depth
fpath = os.path.join(*path_up, fpath)
body = body.replace(url, fpath)
with open(file_path, "w", encoding="UTF-8") as fw:
fw.write(body)


def fetch_external_assets(
docs_folder: str = "docs/source",
assets_folder: str = "fetched-s3-assets",
file_pattern: str = "*.rst",
retrieve_pattern: str = r"https?://[-a-zA-Z0-9_]+\.s3\.[-a-zA-Z0-9()_\\+.\\/=]+",
) -> None:
"""Search all URL in docs, download these files locally and replace online with local version.
Args:
docs_folder: the location of docs related to the project root
assets_folder: a folder inside ``docs_folder`` to be created and saving online assets
file_pattern: what kind of files shall be scanned
retrieve_pattern: patter for reg. expression to search URL/S3 resources
"""
list_files = glob.glob(os.path.join(docs_folder, "**", file_pattern), recursive=True)
if not list_files:
logging.warning(f'no files were listed in folder "{docs_folder}" and pattern "{file_pattern}"')
return

urls = _search_all_occurrences(list_files, pattern=retrieve_pattern)
if not urls:
logging.info(f"no resources/assets were match in {docs_folder} for {retrieve_pattern}")
return
target_folder = os.path.join(docs_folder, assets_folder)
os.makedirs(target_folder, exist_ok=True)
pairs_url_file = []
for i, url in enumerate(set(urls)):
logging.info(f" >> downloading ({i}/{len(urls)}): {url}")
fname = _download_file(url, target_folder)
pairs_url_file.append((url, os.path.join(assets_folder, fname)))

for fpath in list_files:
_replace_remote_with_local(fpath, docs_folder, pairs_url_file)
3 changes: 3 additions & 0 deletions src/lightning_utilities/test/warning.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# http://www.apache.org/licenses/LICENSE-2.0
#
import re
import warnings
from contextlib import contextmanager
Expand Down
4 changes: 4 additions & 0 deletions tests/unittests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import os

_PATH_UNITTESTS = os.path.dirname(__file__)
_PATH_ROOT = os.path.dirname(os.path.dirname(_PATH_UNITTESTS))
Empty file.
30 changes: 30 additions & 0 deletions tests/unittests/docs/test_retriever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os.path
import shutil

from unittests import _PATH_ROOT

from lightning_utilities.docs import fetch_external_assets


def test_retriever_s3():
path_docs = os.path.join(_PATH_ROOT, "docs", "source")
path_index = os.path.join(path_docs, "index.rst")
path_page = os.path.join(path_docs, "any", "extra", "page.rst")
os.makedirs(os.path.dirname(path_page), exist_ok=True)
shutil.copy(path_index, path_page)

fetch_external_assets(docs_folder=path_docs)

with open(path_index, encoding="UTF-8") as fo:
body = fo.read()
# that the image exists~
assert "Lightning.gif" in body
# but it is not sourced from S3
assert ".s3." not in body

with open(path_page, encoding="UTF-8") as fo:
body = fo.read()
# that the image exists~
assert "Lightning.gif" in body
# check the proper depth
assert os.path.sep.join(["..", ".."]) in body

0 comments on commit e0acfd8

Please sign in to comment.