Skip to content

Commit

Permalink
CI: frequently check for dead links
Browse files Browse the repository at this point in the history
  • Loading branch information
Tobi-De committed Nov 22, 2023
1 parent e06b460 commit 27529f6
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 14 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/check-dead-links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Check Dead Links

on:
push:
workflow_dispatch:
schedule:
- cron: "0 12 * * 0"

jobs:
check-links:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: "Set up Python"
uses: actions/setup-python@v3
with:
python-version: "3.x"

- name: Check dead links
run: python dead_links.py
53 changes: 39 additions & 14 deletions dead_links.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
import json
from pathlib import Path
import urllib.request
import threading
import concurrent.futures
import os
import datetime as dt

GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
REPO = "Tobi-De/htmx_extensions"


def check_link(url):
try:
urllib.request.urlopen(url)
except urllib.error.HTTPError as e:
print(f"HTTPError: {url} {e.code}")
return url
except urllib.error.URLError as e:
print(f"URLError: {url} {e.reason}")
return url
else:
print(f"OK: {url}")

Expand All @@ -19,19 +26,37 @@ def check_dead_links():
extensions = Path("data/extensions.json")
extensions = extensions.read_text()
extensions = json.loads(extensions)
threads = []
for extension in extensions.values():
doc_url = extension["doc_url"]
download_url = extension["download_url"]
thread1 = threading.Thread(target=check_link, args=(doc_url,))
thread2 = threading.Thread(target=check_link, args=(download_url,))
thread1.start()
thread2.start()
threads.append(thread1)
threads.append(thread2)
for thread in threads:
thread.join()
with concurrent.futures.ThreadPoolExecutor() as executor:
future_to_url = {
executor.submit(check_link, url): url
for extension in extensions.values()
for url in (extension["doc_url"], extension["download_url"])
}
dead_links = [
future.result()
for future in concurrent.futures.as_completed(future_to_url)
if future.result() is not None
]
return dead_links


def create_github_issue(dead_links):
url = f"https://api.github.com/repos/{REPO}/issues"
headers = {
"Authorization": f"token {GITHUB_TOKEN}",
"Accept": "application/vnd.github.v3+json",
}
data = {
"title": f"Dead links - {dt.datetime.now().strftime('%Y-%m-%d %H:%M')}",
"body": "\n".join(dead_links),
}
req = urllib.request.Request(url, json.dumps(data).encode(), headers)
urllib.request.urlopen(req)




if __name__ == "__main__":
check_dead_links()
dead_links = check_dead_links()
if dead_links:
create_github_issue(dead_links)

0 comments on commit 27529f6

Please sign in to comment.