Skip to content

Commit

Permalink
Exclude web.archive.org from linkcheck
Browse files Browse the repository at this point in the history
Also simplify linkcheck test code
  • Loading branch information
MetRonnie committed Jan 30, 2023
1 parent 79e324d commit 013a919
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_fast.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,5 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }} # Token not required for public repos, but might reduce chance of random 404 error?

- name: Linkcheck
if: startsWith(matrix.python-version, 3.9)
if: startsWith(matrix.python-version, '3.10')
run: pytest -m linkcheck tests/unit
53 changes: 21 additions & 32 deletions tests/unit/test_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,58 +13,50 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Check links inserted into internal documentation.
Reason for doing this here:
- Some links don't appear to be being picked up by Cylc-doc linkcheck.
- As we have more links it's worth checking them here, rather than waiting
for them to show up in Cylc.
"""
from functools import lru_cache

import fnmatch
from pathlib import Path
import re
from shlex import split
from subprocess import run
from time import sleep
import pytest
import urllib

EXCLUDE = [
'http://www.gnu.org/licenses/',
'http://my-site.com/workflows/%(workflow)s/index.html',
'http://ahost/%(owner)s/notes/%(workflow)s',
'http://my-site.com/workflows/%(workflow)s/'
r'*//www.gnu.org/licenses/',
r'*//my-site.com/*',
r'*//ahost/%(owner)s/notes/%(workflow)s',
r'*//web.archive.org/*'
]


def get_links():
searchdir = Path(__file__).parent.parent.parent / 'cylc/flow'
results = {}
for file_ in searchdir.rglob('*.py'):
searchdir = Path(__file__).parent.parent.parent / 'cylc' / 'flow'
return sorted({
url
for file_ in searchdir.rglob('*.py')
for url in re.findall(
r'(https?:\/\/.*?)[\n\s\>`"\',]', file_.read_text()
):
if url not in EXCLUDE and url in results:
results[url].append(file_)
if url not in EXCLUDE and url not in results:
results[url] = [file_]
return results
)
if not any(
fnmatch.fnmatch(url, pattern) for pattern in EXCLUDE
)
})


@pytest.mark.linkcheck
@pytest.mark.parametrize(
'link, files', [
pytest.param(
link,
files,
id=f"{link}"
)
for link, files in get_links().items()
]
)
def test_embedded_url(link, files):
@pytest.mark.parametrize('link', get_links())
def test_embedded_url(link):
try:
urllib.request.urlopen(link).getcode()
except urllib.error.HTTPError as exc:
except urllib.error.HTTPError:
# Sleep and retry to reduce risk of flakiness:
sleep(10)
try:
Expand All @@ -73,7 +65,4 @@ def test_embedded_url(link, files):
# Allowing 403 - just because a site forbids us doesn't mean the
# link is wrong.
if exc.code != 403:
raise Exception(f'{exc} | {link} | {", ".join(files)}')



raise Exception(f'{exc} | {link}')

0 comments on commit 013a919

Please sign in to comment.