From 013a9193c12054d0a302f0d81ea1a7af66dd5086 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Mon, 30 Jan 2023 16:45:36 +0000 Subject: [PATCH] Exclude web.archive.org from linkcheck Also simplify linkcheck test code --- .github/workflows/test_fast.yml | 2 +- tests/unit/test_links.py | 53 +++++++++++++-------------------- 2 files changed, 22 insertions(+), 33 deletions(-) diff --git a/.github/workflows/test_fast.yml b/.github/workflows/test_fast.yml index 1be7bd78d9e..3234b501a50 100644 --- a/.github/workflows/test_fast.yml +++ b/.github/workflows/test_fast.yml @@ -115,5 +115,5 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} # Token not required for public repos, but might reduce chance of random 404 error? - name: Linkcheck - if: startsWith(matrix.python-version, 3.9) + if: startsWith(matrix.python-version, '3.10') run: pytest -m linkcheck tests/unit diff --git a/tests/unit/test_links.py b/tests/unit/test_links.py index 0ef5153de62..3e07a3af495 100644 --- a/tests/unit/test_links.py +++ b/tests/unit/test_links.py @@ -13,6 +13,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . + """Check links inserted into internal documentation. Reason for doing this here: @@ -20,51 +21,42 @@ - As we have more links it's worth checking them here, rather than waiting for them to show up in Cylc. """ -from functools import lru_cache + +import fnmatch from pathlib import Path import re -from shlex import split -from subprocess import run from time import sleep import pytest import urllib EXCLUDE = [ - 'http://www.gnu.org/licenses/', - 'http://my-site.com/workflows/%(workflow)s/index.html', - 'http://ahost/%(owner)s/notes/%(workflow)s', - 'http://my-site.com/workflows/%(workflow)s/' + r'*//www.gnu.org/licenses/', + r'*//my-site.com/*', + r'*//ahost/%(owner)s/notes/%(workflow)s', + r'*//web.archive.org/*' ] + def get_links(): - searchdir = Path(__file__).parent.parent.parent / 'cylc/flow' - results = {} - for file_ in searchdir.rglob('*.py'): + searchdir = Path(__file__).parent.parent.parent / 'cylc' / 'flow' + return sorted({ + url + for file_ in searchdir.rglob('*.py') for url in re.findall( r'(https?:\/\/.*?)[\n\s\>`"\',]', file_.read_text() - ): - if url not in EXCLUDE and url in results: - results[url].append(file_) - if url not in EXCLUDE and url not in results: - results[url] = [file_] - return results + ) + if not any( + fnmatch.fnmatch(url, pattern) for pattern in EXCLUDE + ) + }) @pytest.mark.linkcheck -@pytest.mark.parametrize( - 'link, files', [ - pytest.param( - link, - files, - id=f"{link}" - ) - for link, files in get_links().items() - ] -) -def test_embedded_url(link, files): +@pytest.mark.parametrize('link', get_links()) +def test_embedded_url(link): try: urllib.request.urlopen(link).getcode() - except urllib.error.HTTPError as exc: + except urllib.error.HTTPError: # Sleep and retry to reduce risk of flakiness: sleep(10) try: @@ -73,7 +65,4 @@ def test_embedded_url(link, files): # Allowing 403 - just because a site forbids us doesn't mean the # link is wrong. if exc.code != 403: - raise Exception(f'{exc} | {link} | {", ".join(files)}') - - - + raise Exception(f'{exc} | {link}')