Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: fetch all external resources when building docs #142

Merged
merged 30 commits into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@ jobs:

- name: Install dependencies
run: |
pip install -e . -U --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
pip install -q -r requirements/test.txt
python --version
pip install -e . -U -r requirements/dev-tests.txt \
-f https://download.pytorch.org/whl/cpu/torch_stable.html
pip --version
pip list

Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/ci-use-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ jobs:

check-docs:
uses: ./.github/workflows/check-docs.yml
with:
requirements-file: 'requirements/dev-docs.txt'

check-md-links:
uses: ./.github/workflows/check-md-links.yml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deploy-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:

- name: Install dependencies
run: |
pip install -e . -U -q -r requirements/docs.txt \
pip install -e . -U -q -r requirements/dev-docs.txt \
-f https://download.pytorch.org/whl/cpu/torch_stable.html
# install Texlive, see https://linuxconfig.org/how-to-install-latex-on-ubuntu-20-04-focal-fossa-linux
sudo apt-get update --fix-missing
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ coverage.xml

# Sphinx documentation
docs/_build/
docs/source/fetched-s3-assets
docs/source/api/
docs/source/*.md

Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export SPHINX_MOCK_REQUIREMENTS=0

test:
pip install -q -r requirements/cli.txt
pip install -q -r requirements/test.txt
pip install -q -r requirements/dev-tests.txt

# use this to run tests
rm -rf _ckpt_*
Expand All @@ -19,7 +19,7 @@ test:
# python -m coverage run --source src/lightning_utilities -m pytest --flake8 --durations=0 -v -k

docs: clean
pip install -e . -q -r requirements/docs.txt
pip install -e . -q -r requirements/dev-docs.txt
cd docs && $(MAKE) html

clean:
Expand Down
39 changes: 14 additions & 25 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import glob
import inspect
import os
Expand All @@ -19,6 +13,14 @@
import pt_lightning_sphinx_theme

import lightning_utilities
from lightning_utilities.docs import fetch_external_assets

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.


_PATH_HERE = os.path.abspath(os.path.dirname(__file__))
_PATH_ROOT = os.path.realpath(os.path.join(_PATH_HERE, "..", ".."))
Expand All @@ -43,37 +45,24 @@

# -- Project documents -------------------------------------------------------


def _transform_changelog(path_in: str, path_out: str) -> None:
with open(path_in) as fp:
chlog_lines = fp.readlines()
# enrich short subsub-titles to be unique
chlog_ver = ""
for i, ln in enumerate(chlog_lines):
if ln.startswith("## "):
chlog_ver = ln[2:].split("-")[0].strip()
elif ln.startswith("### "):
ln = ln.replace("###", f"### {chlog_ver} -")
chlog_lines[i] = ln
with open(path_out, "w") as fp:
fp.writelines(chlog_lines)
fetch_external_assets(docs_folder=_PATH_HERE)


def _convert_markdown(path_in: str, path_out: str) -> None:
# export the READme
def _convert_markdown(path_in: str, path_out: str, path_root: str) -> None:
with open(path_in) as fp:
readme = fp.read()
# TODO: temp fix removing SVG badges and GIF, because PDF cannot show them
readme = re.sub(r"(\[!\[.*\))", "", readme)
readme = re.sub(r"(!\[.*.gif\))", "", readme)
folder_names = (os.path.basename(p) for p in glob.glob(os.path.join(_PATH_ROOT, "*")) if os.path.isdir(p))
folder_names = (os.path.basename(p) for p in glob.glob(os.path.join(path_root, "*")) if os.path.isdir(p))
for dir_name in folder_names:
readme = readme.replace("](%s/" % dir_name, "](%s/" % os.path.join(_PATH_ROOT, dir_name))
readme = readme.replace("](%s/" % dir_name, "](%s/" % os.path.join(path_root, dir_name))
with open(path_out, "w") as fp:
fp.write(readme)


# export the READme
_convert_markdown(os.path.join(_PATH_ROOT, "README.md"), "readme.md")
_convert_markdown(os.path.join(_PATH_ROOT, "README.md"), "readme.md", _PATH_ROOT)

# -- General configuration ---------------------------------------------------

Expand Down
11 changes: 8 additions & 3 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@

Lightning-DevToolbox documentation
==================================

.. figure:: https://pl-public-data.s3.amazonaws.com/assets_lightning/Lightning.gif
:alt: What is Lightning gif.
:width: 100 %

.. toctree::
:maxdepth: 1
:name: start
:caption: Start here
:name: content
:caption: Overview

readme
Utilities readme <readme>


Indices and tables
Expand Down
16 changes: 16 additions & 0 deletions requirements/dev-docs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
sphinx >=6.0,<7.0
myst-parser >=2.0.0, <3.0.0
nbsphinx >=0.8.5
ipython[notebook]
pandoc >=1.0
docutils >=0.16
# https://github.com/jupyterlab/jupyterlab_pygments/issues/5
pygments >=2.4.1
sphinxcontrib-fulltoc >=1.0
sphinxcontrib-mockautodoc

pt-lightning-sphinx-theme @ https://github.com/Lightning-AI/lightning_sphinx_theme/archive/master.zip
sphinx-autodoc-typehints >=1.0
sphinx-paramlinks >=0.5.1
sphinx-togglebutton >=0.2
sphinx-copybutton >=0.3
4 changes: 4 additions & 0 deletions requirements/dev-tests.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
coverage ==6.5.0
pytest ==7.2.0
pytest-cov ==4.0.0
pytest-timeout ==2.1.0
17 changes: 1 addition & 16 deletions requirements/docs.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1 @@
sphinx >=6.0,<7.0
myst-parser >=2.0.0, <3.0.0
nbsphinx >=0.8.5
ipython[notebook]
pandoc >=1.0
docutils >=0.16
# https://github.com/jupyterlab/jupyterlab_pygments/issues/5
pygments >=2.4.1
sphinxcontrib-fulltoc >=1.0
sphinxcontrib-mockautodoc

pt-lightning-sphinx-theme @ https://github.com/Lightning-AI/lightning_sphinx_theme/archive/master.zip
sphinx-autodoc-typehints >=1.0
sphinx-paramlinks >=0.5.1
sphinx-togglebutton >=0.2
sphinx-copybutton >=0.3
requests >=2.0.0
4 changes: 0 additions & 4 deletions requirements/test.txt

This file was deleted.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _load_py_module(fname: str, pkg: str = "lightning_utilities"):
# make extras as automated loading
requirements_extra = {}
for fpath in glob.glob(os.path.join(_PATH_REQUIRE, "*.txt")):
if os.path.basename(fpath) == "base.txt":
if os.path.basename(fpath) in ("base.txt", "dev-docs.txt", "dev-tests.txt"):
continue
name, _ = os.path.splitext(os.path.basename(fpath))
with open(fpath) as fp:
Expand Down
4 changes: 1 addition & 3 deletions src/lightning_utilities/__about__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import time

__version__ = "0.8.0"
__version__ = "0.9.0dev"
__author__ = "Lightning AI et al."
__author_email__ = "pytorch@lightning.ai"
__license__ = "Apache-2.0"
__copyright__ = f"Copyright (c) 2022-{time.strftime('%Y')}, {__author__}."
__homepage__ = "https://github.com/Lightning-AI/utilities"
__docs__ = "PyTorch Lightning Sample project."
__long_doc__ = """
What is it?
-----------
This package allows for sharing GH workflows, CI/CD assistance actions, and Python utilities across the Lightning
ecosystem.
"""
Expand Down
1 change: 1 addition & 0 deletions src/lightning_utilities/docs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from lightning_utilities.docs.retriever import fetch_external_assets
27 changes: 27 additions & 0 deletions src/lightning_utilities/docs/formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# http://www.apache.org/licenses/LICENSE-2.0
#
import glob
import os
import re


def _transform_changelog(path_in: str, path_out: str) -> None:
"""Adjust changelog titles so not to be duplicated.

Args:
path_in: input MD file
path_out: output also MD file
"""
with open(path_in) as fp:
chlog_lines = fp.readlines()
# enrich short subsub-titles to be unique
chlog_ver = ""
for i, ln in enumerate(chlog_lines):
if ln.startswith("## "):
chlog_ver = ln[2:].split("-")[0].strip()
elif ln.startswith("### "):
ln = ln.replace("###", f"### {chlog_ver} -")
chlog_lines[i] = ln
with open(path_out, "w") as fp:
fp.writelines(chlog_lines)
99 changes: 99 additions & 0 deletions src/lightning_utilities/docs/retriever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# http://www.apache.org/licenses/LICENSE-2.0
#
import glob
import logging
import os
import re
from typing import List, Tuple

import requests


def _download_file(file_url: str, folder: str) -> str:
"""Download a file from URL to a particular folder."""
fname = os.path.basename(file_url)
file_path = os.path.join(folder, fname)
if os.path.isfile(file_path):
logging.warning(f'given file "{file_path}" already exists and will be overwritten with {file_url}')
# see: https://stackoverflow.com/a/34957875
rq = requests.get(file_url)
with open(file_path, "wb") as outfile:
outfile.write(rq.content)
return fname


def _search_all_occurrences(list_files: List[str], pattern: str) -> List[str]:
"""Search for all occurrences of specific patter in a collection of files.

Args:
list_files: list of files to be scanned
pattern: pattern for search, reg. expression
"""
collected = []
for file_path in list_files:
with open(file_path, encoding="UTF-8") as fo:
body = fo.read()
found = re.findall(pattern, body)
collected += found
return collected


def _replace_remote_with_local(
file_path: str, docs_folder: str, pairs_url_path: List[Tuple[str, str]], base_depth: int = 2
) -> None:
"""Replace all URL with local files in a given file.

Args:
file_path: file for replacement
docs_folder: the location of docs related to the project root
pairs_url_path: pairs of URL and local file path to be swapped
"""
# drop the default/global path to the docs
relt_path = os.path.dirname(file_path).replace(docs_folder, "")
# filter the path starting with / as not empty folder names
depth = len([p for p in relt_path.split(os.path.sep) if p])
with open(file_path, encoding="UTF-8") as fo:
body = fo.read()
for url, fpath in pairs_url_path:
if depth:
path_up = [".."] * depth
fpath = os.path.join(*path_up, fpath)
body = body.replace(url, fpath)
with open(file_path, "w", encoding="UTF-8") as fw:
fw.write(body)


def fetch_external_assets(
docs_folder: str = "docs/source",
assets_folder: str = "fetched-s3-assets",
file_pattern: str = "*.rst",
retrieve_pattern: str = r"https?://[-a-zA-Z0-9_]+\.s3\.[-a-zA-Z0-9()_\\+.\\/=]+",
) -> None:
"""Search all URL in docs, download these files locally and replace online with local version.

Args:
docs_folder: the location of docs related to the project root
assets_folder: a folder inside ``docs_folder`` to be created and saving online assets
file_pattern: what kind of files shall be scanned
retrieve_pattern: patter for reg. expression to search URL/S3 resources
"""
list_files = glob.glob(os.path.join(docs_folder, "**", file_pattern), recursive=True)
if not list_files:
logging.warning(f'no files were listed in folder "{docs_folder}" and pattern "{file_pattern}"')
return

urls = _search_all_occurrences(list_files, pattern=retrieve_pattern)
if not urls:
logging.info(f"no resources/assets were match in {docs_folder} for {retrieve_pattern}")
return
target_folder = os.path.join(docs_folder, assets_folder)
os.makedirs(target_folder, exist_ok=True)
pairs_url_file = []
for i, url in enumerate(set(urls)):
logging.info(f" >> downloading ({i}/{len(urls)}): {url}")
fname = _download_file(url, target_folder)
pairs_url_file.append((url, os.path.join(assets_folder, fname)))

for fpath in list_files:
_replace_remote_with_local(fpath, docs_folder, pairs_url_file)
3 changes: 3 additions & 0 deletions src/lightning_utilities/test/warning.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# http://www.apache.org/licenses/LICENSE-2.0
#
import re
import warnings
from contextlib import contextmanager
Expand Down
4 changes: 4 additions & 0 deletions tests/unittests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import os

_PATH_UNITTESTS = os.path.dirname(__file__)
_PATH_ROOT = os.path.dirname(os.path.dirname(_PATH_UNITTESTS))
Empty file.
30 changes: 30 additions & 0 deletions tests/unittests/docs/test_retriever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os.path
import shutil

from unittests import _PATH_ROOT

from lightning_utilities.docs import fetch_external_assets


def test_retriever_s3():
path_docs = os.path.join(_PATH_ROOT, "docs", "source")
path_index = os.path.join(path_docs, "index.rst")
path_page = os.path.join(path_docs, "any", "extra", "page.rst")
os.makedirs(os.path.dirname(path_page), exist_ok=True)
shutil.copy(path_index, path_page)

fetch_external_assets(docs_folder=path_docs)

with open(path_index, encoding="UTF-8") as fo:
body = fo.read()
# that the image exists~
assert "Lightning.gif" in body
# but it is not sourced from S3
assert ".s3." not in body

with open(path_page, encoding="UTF-8") as fo:
body = fo.read()
# that the image exists~
assert "Lightning.gif" in body
# check the proper depth
assert os.path.sep.join(["..", ".."]) in body