From 8ebf2c998d17d10740e8751195aa5e6e65a85c8e Mon Sep 17 00:00:00 2001 From: enitrat Date: Fri, 1 Aug 2025 17:50:31 +0100 Subject: [PATCH] feat: add documentation snapshot crawler tool - Created DocsCrawler class with async HTTP session management - Implemented sitemap discovery and parsing with nested sitemap support - Added URL validation to only crawl children of base URL path - Included fallback crawling mechanism limited to 100 pages - Added concurrent fetching with semaphore control and retry logic - Implemented multi-strategy content extraction and HTML to Markdown conversion - Added comprehensive URL filtering for non-documentation paths - Included CLI interface with argparse and progress indication - Updated dependencies: aiohttp, beautifulsoup4, lxml, markdownify, tqdm - Added docs-crawler script entry point to pyproject.toml --- python/pyproject.toml | 7 + python/scripts/docs_crawler.py | 430 +++++++++++++++++++++++++++++++++ python/uv.lock | 119 +++++++++ 3 files changed, 556 insertions(+) create mode 100755 python/scripts/docs_crawler.py diff --git a/python/pyproject.toml b/python/pyproject.toml index b7813a19..fb60673f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -47,6 +47,12 @@ dependencies = [ "langsmith>=0.4.6", "psycopg2-binary>=2.9.10", "typer>=0.15.0", + # Docs crawler dependencies + "aiohttp>=3.9.0", + "beautifulsoup4>=4.12.0", + "lxml>=4.9.0", + "markdownify>=0.11.0", + "tqdm>=4.66.0", ] [project.optional-dependencies] @@ -72,6 +78,7 @@ generate_starklings_dataset = "cairo_coder.optimizers.generation.generate_starkl optimize_generation = "cairo_coder.optimizers.generation.optimize_generation:main" starklings_evaluate = "scripts.starklings_evaluate:main" cairo-coder-summarize = "scripts.summarizer.cli:app" +docs-crawler = "scripts.docs_crawler:main" [project.urls] "Homepage" = "https://github.com/cairo-coder/cairo-coder" diff --git a/python/scripts/docs_crawler.py b/python/scripts/docs_crawler.py new file mode 100755 index 00000000..05ca2084 --- /dev/null +++ b/python/scripts/docs_crawler.py @@ -0,0 +1,430 @@ +#!/usr/bin/env python3 +"""Documentation Snapshot Crawler - Extract clean documentation content from websites.""" + +import argparse +import asyncio +import logging +import re +import xml.etree.ElementTree as ET +from collections import deque +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional +from urllib.parse import urljoin, urlparse, urlunparse + +import aiohttp +from bs4 import BeautifulSoup +from markdownify import markdownify +from tqdm.asyncio import tqdm + +# Configuration +UA = "NotebookLM-prep-crawler/1.1 (+contact: you@example.com)" +OUT_FILE = Path("doc_dump") +CONCURRENCY = 6 +MAX_RETRIES = 3 +TIMEOUT = 30 +MAX_CRAWL_PAGES = 100 + +# URL patterns to exclude (non-documentation paths) +EXCLUDE_PATTERNS = [ + r'/admin', r'/api/', r'/login', r'/search', r'/tag/', r'/category/', + r'/author/', r'/user/', r'/wp-admin', r'/wp-content', r'/wp-includes', + r'/_next/', r'/static/', r'/assets/', r'/js/', r'/css/', r'/images/', + r'/feed', r'/rss', r'/atom', r'/sitemap', r'/robots\.txt', + r'mailto:', r'tel:', r'/#' +] + +# Common documentation content selectors +DOC_SELECTORS = [ + 'main', 'article', '[role="main"]', '.content', '.doc-content', + '.gitbook-page', '.markdown-body', '.docs-content', '.documentation', + '.post-content', '.entry-content', '.page-content', '#content', + '.container-fluid', '.container', '.wrapper' +] + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +class DocsCrawler: + def __init__(self, base_url: str): + self.base_url = base_url.rstrip('/') + '/' + self.domain = urlparse(self.base_url).netloc + self.discovered_urls: list[str] = [] + self.fetched_pages: dict[str, dict] = {} + self.session: Optional[aiohttp.ClientSession] = None + self.semaphore = asyncio.Semaphore(CONCURRENCY) + + async def __aenter__(self): + timeout = aiohttp.ClientTimeout(total=TIMEOUT) + self.session = aiohttp.ClientSession( + headers={'User-Agent': UA}, + timeout=timeout + ) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if self.session: + await self.session.close() + + def is_valid_url(self, url: str) -> bool: + """Check if URL should be included.""" + parsed = urlparse(url) + base_parsed = urlparse(self.base_url) + + # Same host only + if parsed.netloc != base_parsed.netloc: + return False + + # Must be a child of the base URL path + base_path = base_parsed.path.rstrip('/') + url_path = parsed.path.rstrip('/') + if base_path and not url_path.startswith(base_path): + return False + + # No query strings + if parsed.query: + return False + + # Check exclude patterns + path = parsed.path + return not any(re.search(pattern, path, re.IGNORECASE) for pattern in EXCLUDE_PATTERNS) + + def normalize_url(self, url: str) -> str: + """Remove fragment and normalize URL.""" + parsed = urlparse(url) + return urlunparse(parsed._replace(fragment='')) + + async def fetch_sitemap(self, url: str) -> Optional[str]: + """Fetch a sitemap XML file.""" + try: + async with self.session.get(url) as response: + if response.status == 200: + return await response.text() + except Exception as e: + logger.debug(f"Failed to fetch sitemap {url}: {e}") + return None + + async def parse_sitemap(self, sitemap_url: str) -> list[str]: + """Parse sitemap and extract URLs, handling nested sitemaps.""" + urls = [] + sitemap_content = await self.fetch_sitemap(sitemap_url) + + if not sitemap_content: + return urls + + try: + root = ET.fromstring(sitemap_content) + + # Handle sitemap index (nested sitemaps) + sitemap_locs = root.findall('.//{http://www.sitemaps.org/schemas/sitemap/0.9}sitemap/{http://www.sitemaps.org/schemas/sitemap/0.9}loc') + if sitemap_locs: + logger.info(f"Found sitemap index with {len(sitemap_locs)} nested sitemaps") + for loc in sitemap_locs: + nested_urls = await self.parse_sitemap(loc.text) + urls.extend(nested_urls) + + # Handle regular urlset + url_locs = root.findall('.//{http://www.sitemaps.org/schemas/sitemap/0.9}url/{http://www.sitemaps.org/schemas/sitemap/0.9}loc') + urls.extend([loc.text for loc in url_locs if loc.text]) + + except ET.ParseError as e: + logger.error(f"Failed to parse sitemap: {e}") + + return urls + + async def discover_urls_from_sitemap(self) -> list[str]: + """Try to discover URLs from sitemap.""" + sitemap_url = urljoin(self.base_url, '/sitemap.xml') + logger.info(f"Checking for sitemap at: {sitemap_url}") + + urls = await self.parse_sitemap(sitemap_url) + + # Filter and normalize URLs + valid_urls = [] + seen = set() + + for url in urls: + if self.is_valid_url(url): + normalized = self.normalize_url(url) + if normalized not in seen: + seen.add(normalized) + valid_urls.append(normalized) + + logger.info(f"Found {len(valid_urls)} valid URLs from sitemap") + return valid_urls + + async def crawl_page(self, url: str, visited: set[str]) -> set[str]: + """Crawl a single page and extract links.""" + new_urls = set() + + try: + async with self.session.get(url) as response: + if response.status != 200: + return new_urls + + html = await response.text() + soup = BeautifulSoup(html, 'lxml') + + # Extract all links + for tag in soup.find_all(['a', 'link']): + href = tag.get('href') + if href: + absolute_url = urljoin(url, href) + normalized = self.normalize_url(absolute_url) + + if (self.is_valid_url(normalized) and + normalized not in visited and + len(visited) + len(new_urls) < MAX_CRAWL_PAGES): + new_urls.add(normalized) + + except Exception as e: + logger.debug(f"Failed to crawl {url}: {e}") + + return new_urls + + async def discover_urls_by_crawling(self) -> list[str]: + """Fallback: discover URLs by crawling from base URL.""" + logger.info("Falling back to crawling...") + + visited = set() + queue = deque([self.base_url]) + visited.add(self.base_url) + + while queue and len(visited) < MAX_CRAWL_PAGES: + current_url = queue.popleft() + new_urls = await self.crawl_page(current_url, visited) + + for url in new_urls: + if url not in visited: + visited.add(url) + queue.append(url) + + logger.info(f"Discovered {len(visited)} pages by crawling") + return list(visited) + + def sort_urls_logically(self, urls: list[str]) -> list[str]: + """Sort URLs in logical order for documentation.""" + def sort_key(url: str) -> tuple: + parsed = urlparse(url) + path = parsed.path.strip('/') + + # Special cases for root/home pages + if not path or path in ['docs', 'documentation']: + return (0, 0, path) + + segments = path.split('/') + # Main section (first segment), depth, full path + main_section = segments[0] if segments else '' + return (1, len(segments), main_section, path) + + return sorted(urls, key=sort_key) + + async def fetch_page(self, url: str) -> dict: + """Fetch a single page with retries.""" + async with self.semaphore: + for attempt in range(MAX_RETRIES): + try: + async with self.session.get(url) as response: + content_type = response.headers.get('Content-Type', '') + + if response.status == 200 and 'text/html' in content_type: + html = await response.text() + return { + 'url': url, + 'status': response.status, + 'content': html, + 'error': None + } + return { + 'url': url, + 'status': response.status, + 'content': None, + 'error': f"Status {response.status} or non-HTML content" + } + + except asyncio.TimeoutError: + error = "Timeout" + except Exception as e: + error = str(e) + + if attempt < MAX_RETRIES - 1: + await asyncio.sleep(2 ** attempt) # Exponential backoff + + return { + 'url': url, + 'status': None, + 'content': None, + 'error': error + } + + def extract_content(self, html: str, url: str) -> tuple[str, str]: + """Extract main content from HTML and convert to Markdown.""" + soup = BeautifulSoup(html, 'lxml') + + # Get title + title_tag = soup.find('title') + title = title_tag.get_text(strip=True) if title_tag else urlparse(url).path + + # Remove boilerplate elements + for tag in soup.find_all(['script', 'style', 'noscript', 'nav', + 'header', 'footer', 'aside', 'img', 'svg', 'iframe']): + tag.decompose() + + # Try to find main content + main_content = None + + # Strategy 1: Common doc selectors + for selector in DOC_SELECTORS: + element = soup.select_one(selector) + if element and len(element.get_text(strip=True)) > 100: + main_content = element + break + + # Strategy 2: Largest div + if not main_content: + all_divs = soup.find_all('div') + valid_divs = [ + div for div in all_divs + if len(div.get_text(strip=True)) > 200 and + not any(kw in (div.get('class', []) + [div.get('id', '')]) + for kw in ['nav', 'menu', 'sidebar', 'header', 'footer']) + ] + if valid_divs: + main_content = max(valid_divs, key=lambda d: len(d.get_text(strip=True))) + + # Strategy 3: Fallback to body + if not main_content: + main_content = soup.find('body') or soup + + # Convert to markdown + markdown = markdownify(str(main_content), heading_style="ATX", strip=['a']) + + # Clean up markdown + markdown = re.sub(r'\n{3,}', '\n\n', markdown) # Multiple newlines + markdown = re.sub(r'^---+\n', '', markdown) # Leading separators + markdown = re.sub(r'^\.\.\.+\n', '', markdown) # YAML-like markers + + return title, markdown.strip() + + async def fetch_all_pages(self) -> None: + """Fetch all discovered pages concurrently.""" + logger.info(f"Fetching {len(self.discovered_urls)} pages...") + + tasks = [self.fetch_page(url) for url in self.discovered_urls] + + results = [] + for f in tqdm.as_completed(tasks, total=len(tasks)): + result = await f + results.append(result) + + # Store results in order + url_to_result = {r['url']: r for r in results} + for url in self.discovered_urls: + self.fetched_pages[url] = url_to_result.get(url, {}) + + def compile_markdown(self) -> str: + """Compile all pages into a single Markdown document.""" + logger.info("Compiling markdown...") + + # Document header + date_str = datetime.now(timezone.utc).strftime('%Y-%m-%d') + lines = [ + f"# {self.domain} — Snapshot ({date_str})", + "", + "Clean documentation content extracted from sitemap.", + "", + "---", + "" + ] + + # Process each page + for url in self.discovered_urls: + page_data = self.fetched_pages.get(url, {}) + + if page_data.get('content'): + title, markdown = self.extract_content(page_data['content'], url) + + if not markdown or len(markdown.strip()) < 50: + markdown = "*No content extracted.*" + + lines.extend([ + f"**Source URL:** {url}", + "", + f"## {title}", + "", + markdown, + "", + "---", + "" + ]) + else: + error = page_data.get('error', 'Unknown error') + lines.extend([ + f"**Source URL:** {url}", + "", + f"## {url}", + "", + f"*Failed to fetch: {error}*", + "", + "---", + "" + ]) + + return '\n'.join(lines) + + async def run(self) -> None: + """Main execution flow.""" + logger.info(f"Starting documentation crawler for: {self.base_url}") + + # Discovery phase + self.discovered_urls = await self.discover_urls_from_sitemap() + + if not self.discovered_urls: + self.discovered_urls = await self.discover_urls_by_crawling() + self.discovered_urls = self.sort_urls_logically(self.discovered_urls) + + if not self.discovered_urls: + logger.error("No URLs discovered!") + return + + logger.info(f"Processing {len(self.discovered_urls)} URLs in order:") + for i, url in enumerate(self.discovered_urls[:10], 1): + logger.info(f" {i}. {url}") + if len(self.discovered_urls) > 10: + logger.info(f" ... and {len(self.discovered_urls) - 10} more") + + # Fetch phase + await self.fetch_all_pages() + + # Compile and save + markdown_content = self.compile_markdown() + + # Save markdown + markdown_path = OUT_FILE.with_suffix('.md') + logger.info(f"Saving markdown to: {markdown_path}") + markdown_path.write_text(markdown_content, encoding='utf-8') + logger.info(f"Markdown file size: {len(markdown_content):,} bytes") + + +def main(): + parser = argparse.ArgumentParser( + description="Documentation Snapshot Crawler - Extract clean documentation content from websites", + epilog=""" +Examples: + uv run docs-crawler https://docs.example.com + """, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument('base_url', help='Base URL of the documentation site (e.g., https://docs.example.com)') + + args = parser.parse_args() + + async def run_crawler(base_url: str): + async with DocsCrawler(base_url) as crawler: + await crawler.run() + return asyncio.run(run_crawler(args.base_url)) + + +if __name__ == '__main__': + main() diff --git a/python/uv.lock b/python/uv.lock index 1379efd3..07c2c45a 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -264,6 +264,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" }, ] +[[package]] +name = "beautifulsoup4" +version = "4.13.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067, upload-time = "2025-04-15T17:05:13.836Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload-time = "2025-04-15T17:05:12.221Z" }, +] + [[package]] name = "black" version = "25.1.0" @@ -321,15 +334,19 @@ name = "cairo-coder" version = "0.1.0" source = { editable = "." } dependencies = [ + { name = "aiohttp" }, { name = "anthropic" }, { name = "asyncpg" }, + { name = "beautifulsoup4" }, { name = "dspy" }, { name = "dspy-ai" }, { name = "fastapi" }, { name = "google-generativeai" }, { name = "httpx" }, { name = "langsmith" }, + { name = "lxml" }, { name = "marimo" }, + { name = "markdownify" }, { name = "mlflow" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -346,6 +363,7 @@ dependencies = [ { name = "python-multipart" }, { name = "structlog" }, { name = "tenacity" }, + { name = "tqdm" }, { name = "typer" }, { name = "uvicorn", extra = ["standard"] }, { name = "websockets" }, @@ -375,8 +393,10 @@ dev = [ [package.metadata] requires-dist = [ + { name = "aiohttp", specifier = ">=3.9.0" }, { name = "anthropic", specifier = ">=0.39.0" }, { name = "asyncpg", specifier = ">=0.30.0" }, + { name = "beautifulsoup4", specifier = ">=4.12.0" }, { name = "black", marker = "extra == 'dev'", specifier = ">=24.0.0" }, { name = "dspy", specifier = ">=2.6.27" }, { name = "dspy-ai", specifier = ">=2.5.0" }, @@ -384,7 +404,9 @@ requires-dist = [ { name = "google-generativeai", specifier = ">=0.8.0" }, { name = "httpx", specifier = ">=0.27.0" }, { name = "langsmith", specifier = ">=0.4.6" }, + { name = "lxml", specifier = ">=4.9.0" }, { name = "marimo", specifier = ">=0.14.11" }, + { name = "markdownify", specifier = ">=0.11.0" }, { name = "mlflow", specifier = ">=2.20" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.0.0" }, { name = "nest-asyncio", marker = "extra == 'dev'", specifier = ">=1.6.0" }, @@ -410,6 +432,7 @@ requires-dist = [ { name = "structlog", specifier = ">=24.0.0" }, { name = "tenacity", specifier = ">=8.0.0" }, { name = "testcontainers", extras = ["postgres"], marker = "extra == 'dev'", specifier = ">=4.0.0" }, + { name = "tqdm", specifier = ">=4.66.0" }, { name = "typer", specifier = ">=0.15.0" }, { name = "types-toml", marker = "extra == 'dev'", specifier = ">=0.10.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.32.0" }, @@ -1913,6 +1936,80 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/cf/ce291b4473b75c8605e5ca6b8bf4a51783eb3b58339984d4fb3b6e1d3579/loro-1.5.2-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:fce298043f02d5714533dc2aaf653f1e455c817bff46837d3cf25753edb39564", size = 3409680, upload-time = "2025-06-23T10:16:04.174Z" }, ] +[[package]] +name = "lxml" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c5/ed/60eb6fa2923602fba988d9ca7c5cdbd7cf25faa795162ed538b527a35411/lxml-6.0.0.tar.gz", hash = "sha256:032e65120339d44cdc3efc326c9f660f5f7205f3a535c1fdbf898b29ea01fb72", size = 4096938, upload-time = "2025-06-26T16:28:19.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/e9/9c3ca02fbbb7585116c2e274b354a2d92b5c70561687dd733ec7b2018490/lxml-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:35bc626eec405f745199200ccb5c6b36f202675d204aa29bb52e27ba2b71dea8", size = 8399057, upload-time = "2025-06-26T16:25:02.169Z" }, + { url = "https://files.pythonhosted.org/packages/86/25/10a6e9001191854bf283515020f3633b1b1f96fd1b39aa30bf8fff7aa666/lxml-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:246b40f8a4aec341cbbf52617cad8ab7c888d944bfe12a6abd2b1f6cfb6f6082", size = 4569676, upload-time = "2025-06-26T16:25:05.431Z" }, + { url = "https://files.pythonhosted.org/packages/f5/a5/378033415ff61d9175c81de23e7ad20a3ffb614df4ffc2ffc86bc6746ffd/lxml-6.0.0-cp310-cp310-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2793a627e95d119e9f1e19720730472f5543a6d84c50ea33313ce328d870f2dd", size = 5291361, upload-time = "2025-06-26T16:25:07.901Z" }, + { url = "https://files.pythonhosted.org/packages/5a/a6/19c87c4f3b9362b08dc5452a3c3bce528130ac9105fc8fff97ce895ce62e/lxml-6.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:46b9ed911f36bfeb6338e0b482e7fe7c27d362c52fde29f221fddbc9ee2227e7", size = 5008290, upload-time = "2025-06-28T18:47:13.196Z" }, + { url = "https://files.pythonhosted.org/packages/09/d1/e9b7ad4b4164d359c4d87ed8c49cb69b443225cb495777e75be0478da5d5/lxml-6.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b4790b558bee331a933e08883c423f65bbcd07e278f91b2272489e31ab1e2b4", size = 5163192, upload-time = "2025-06-28T18:47:17.279Z" }, + { url = "https://files.pythonhosted.org/packages/56/d6/b3eba234dc1584744b0b374a7f6c26ceee5dc2147369a7e7526e25a72332/lxml-6.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2030956cf4886b10be9a0285c6802e078ec2391e1dd7ff3eb509c2c95a69b76", size = 5076973, upload-time = "2025-06-26T16:25:10.936Z" }, + { url = "https://files.pythonhosted.org/packages/8e/47/897142dd9385dcc1925acec0c4afe14cc16d310ce02c41fcd9010ac5d15d/lxml-6.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d23854ecf381ab1facc8f353dcd9adeddef3652268ee75297c1164c987c11dc", size = 5297795, upload-time = "2025-06-26T16:25:14.282Z" }, + { url = "https://files.pythonhosted.org/packages/fb/db/551ad84515c6f415cea70193a0ff11d70210174dc0563219f4ce711655c6/lxml-6.0.0-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:43fe5af2d590bf4691531b1d9a2495d7aab2090547eaacd224a3afec95706d76", size = 4776547, upload-time = "2025-06-26T16:25:17.123Z" }, + { url = "https://files.pythonhosted.org/packages/e0/14/c4a77ab4f89aaf35037a03c472f1ccc54147191888626079bd05babd6808/lxml-6.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74e748012f8c19b47f7d6321ac929a9a94ee92ef12bc4298c47e8b7219b26541", size = 5124904, upload-time = "2025-06-26T16:25:19.485Z" }, + { url = "https://files.pythonhosted.org/packages/70/b4/12ae6a51b8da106adec6a2e9c60f532350a24ce954622367f39269e509b1/lxml-6.0.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:43cfbb7db02b30ad3926e8fceaef260ba2fb7df787e38fa2df890c1ca7966c3b", size = 4805804, upload-time = "2025-06-26T16:25:21.949Z" }, + { url = "https://files.pythonhosted.org/packages/a9/b6/2e82d34d49f6219cdcb6e3e03837ca5fb8b7f86c2f35106fb8610ac7f5b8/lxml-6.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:34190a1ec4f1e84af256495436b2d196529c3f2094f0af80202947567fdbf2e7", size = 5323477, upload-time = "2025-06-26T16:25:24.475Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e6/b83ddc903b05cd08a5723fefd528eee84b0edd07bdf87f6c53a1fda841fd/lxml-6.0.0-cp310-cp310-win32.whl", hash = "sha256:5967fe415b1920a3877a4195e9a2b779249630ee49ece22021c690320ff07452", size = 3613840, upload-time = "2025-06-26T16:25:27.345Z" }, + { url = "https://files.pythonhosted.org/packages/40/af/874fb368dd0c663c030acb92612341005e52e281a102b72a4c96f42942e1/lxml-6.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:f3389924581d9a770c6caa4df4e74b606180869043b9073e2cec324bad6e306e", size = 3993584, upload-time = "2025-06-26T16:25:29.391Z" }, + { url = "https://files.pythonhosted.org/packages/4a/f4/d296bc22c17d5607653008f6dd7b46afdfda12efd31021705b507df652bb/lxml-6.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:522fe7abb41309e9543b0d9b8b434f2b630c5fdaf6482bee642b34c8c70079c8", size = 3681400, upload-time = "2025-06-26T16:25:31.421Z" }, + { url = "https://files.pythonhosted.org/packages/7c/23/828d4cc7da96c611ec0ce6147bbcea2fdbde023dc995a165afa512399bbf/lxml-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4ee56288d0df919e4aac43b539dd0e34bb55d6a12a6562038e8d6f3ed07f9e36", size = 8438217, upload-time = "2025-06-26T16:25:34.349Z" }, + { url = "https://files.pythonhosted.org/packages/f1/33/5ac521212c5bcb097d573145d54b2b4a3c9766cda88af5a0e91f66037c6e/lxml-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8dd6dd0e9c1992613ccda2bcb74fc9d49159dbe0f0ca4753f37527749885c25", size = 4590317, upload-time = "2025-06-26T16:25:38.103Z" }, + { url = "https://files.pythonhosted.org/packages/2b/2e/45b7ca8bee304c07f54933c37afe7dd4d39ff61ba2757f519dcc71bc5d44/lxml-6.0.0-cp311-cp311-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:d7ae472f74afcc47320238b5dbfd363aba111a525943c8a34a1b657c6be934c3", size = 5221628, upload-time = "2025-06-26T16:25:40.878Z" }, + { url = "https://files.pythonhosted.org/packages/32/23/526d19f7eb2b85da1f62cffb2556f647b049ebe2a5aa8d4d41b1fb2c7d36/lxml-6.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5592401cdf3dc682194727c1ddaa8aa0f3ddc57ca64fd03226a430b955eab6f6", size = 4949429, upload-time = "2025-06-28T18:47:20.046Z" }, + { url = "https://files.pythonhosted.org/packages/ac/cc/f6be27a5c656a43a5344e064d9ae004d4dcb1d3c9d4f323c8189ddfe4d13/lxml-6.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58ffd35bd5425c3c3b9692d078bf7ab851441434531a7e517c4984d5634cd65b", size = 5087909, upload-time = "2025-06-28T18:47:22.834Z" }, + { url = "https://files.pythonhosted.org/packages/3b/e6/8ec91b5bfbe6972458bc105aeb42088e50e4b23777170404aab5dfb0c62d/lxml-6.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f720a14aa102a38907c6d5030e3d66b3b680c3e6f6bc95473931ea3c00c59967", size = 5031713, upload-time = "2025-06-26T16:25:43.226Z" }, + { url = "https://files.pythonhosted.org/packages/33/cf/05e78e613840a40e5be3e40d892c48ad3e475804db23d4bad751b8cadb9b/lxml-6.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2a5e8d207311a0170aca0eb6b160af91adc29ec121832e4ac151a57743a1e1e", size = 5232417, upload-time = "2025-06-26T16:25:46.111Z" }, + { url = "https://files.pythonhosted.org/packages/ac/8c/6b306b3e35c59d5f0b32e3b9b6b3b0739b32c0dc42a295415ba111e76495/lxml-6.0.0-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:2dd1cc3ea7e60bfb31ff32cafe07e24839df573a5e7c2d33304082a5019bcd58", size = 4681443, upload-time = "2025-06-26T16:25:48.837Z" }, + { url = "https://files.pythonhosted.org/packages/59/43/0bd96bece5f7eea14b7220476835a60d2b27f8e9ca99c175f37c085cb154/lxml-6.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2cfcf84f1defed7e5798ef4f88aa25fcc52d279be731ce904789aa7ccfb7e8d2", size = 5074542, upload-time = "2025-06-26T16:25:51.65Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3d/32103036287a8ca012d8518071f8852c68f2b3bfe048cef2a0202eb05910/lxml-6.0.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a52a4704811e2623b0324a18d41ad4b9fabf43ce5ff99b14e40a520e2190c851", size = 4729471, upload-time = "2025-06-26T16:25:54.571Z" }, + { url = "https://files.pythonhosted.org/packages/ca/a8/7be5d17df12d637d81854bd8648cd329f29640a61e9a72a3f77add4a311b/lxml-6.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c16304bba98f48a28ae10e32a8e75c349dd742c45156f297e16eeb1ba9287a1f", size = 5256285, upload-time = "2025-06-26T16:25:56.997Z" }, + { url = "https://files.pythonhosted.org/packages/cd/d0/6cb96174c25e0d749932557c8d51d60c6e292c877b46fae616afa23ed31a/lxml-6.0.0-cp311-cp311-win32.whl", hash = "sha256:f8d19565ae3eb956d84da3ef367aa7def14a2735d05bd275cd54c0301f0d0d6c", size = 3612004, upload-time = "2025-06-26T16:25:59.11Z" }, + { url = "https://files.pythonhosted.org/packages/ca/77/6ad43b165dfc6dead001410adeb45e88597b25185f4479b7ca3b16a5808f/lxml-6.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b2d71cdefda9424adff9a3607ba5bbfc60ee972d73c21c7e3c19e71037574816", size = 4003470, upload-time = "2025-06-26T16:26:01.655Z" }, + { url = "https://files.pythonhosted.org/packages/a0/bc/4c50ec0eb14f932a18efc34fc86ee936a66c0eb5f2fe065744a2da8a68b2/lxml-6.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:8a2e76efbf8772add72d002d67a4c3d0958638696f541734304c7f28217a9cab", size = 3682477, upload-time = "2025-06-26T16:26:03.808Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/d01d735c298d7e0ddcedf6f028bf556577e5ab4f4da45175ecd909c79378/lxml-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78718d8454a6e928470d511bf8ac93f469283a45c354995f7d19e77292f26108", size = 8429515, upload-time = "2025-06-26T16:26:06.776Z" }, + { url = "https://files.pythonhosted.org/packages/06/37/0e3eae3043d366b73da55a86274a590bae76dc45aa004b7042e6f97803b1/lxml-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:84ef591495ffd3f9dcabffd6391db7bb70d7230b5c35ef5148354a134f56f2be", size = 4601387, upload-time = "2025-06-26T16:26:09.511Z" }, + { url = "https://files.pythonhosted.org/packages/a3/28/e1a9a881e6d6e29dda13d633885d13acb0058f65e95da67841c8dd02b4a8/lxml-6.0.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2930aa001a3776c3e2601cb8e0a15d21b8270528d89cc308be4843ade546b9ab", size = 5228928, upload-time = "2025-06-26T16:26:12.337Z" }, + { url = "https://files.pythonhosted.org/packages/9a/55/2cb24ea48aa30c99f805921c1c7860c1f45c0e811e44ee4e6a155668de06/lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563", size = 4952289, upload-time = "2025-06-28T18:47:25.602Z" }, + { url = "https://files.pythonhosted.org/packages/31/c0/b25d9528df296b9a3306ba21ff982fc5b698c45ab78b94d18c2d6ae71fd9/lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7", size = 5111310, upload-time = "2025-06-28T18:47:28.136Z" }, + { url = "https://files.pythonhosted.org/packages/e9/af/681a8b3e4f668bea6e6514cbcb297beb6de2b641e70f09d3d78655f4f44c/lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7", size = 5025457, upload-time = "2025-06-26T16:26:15.068Z" }, + { url = "https://files.pythonhosted.org/packages/99/b6/3a7971aa05b7be7dfebc7ab57262ec527775c2c3c5b2f43675cac0458cad/lxml-6.0.0-cp312-cp312-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d6e200909a119626744dd81bae409fc44134389e03fbf1d68ed2a55a2fb10991", size = 5657016, upload-time = "2025-07-03T19:19:06.008Z" }, + { url = "https://files.pythonhosted.org/packages/69/f8/693b1a10a891197143c0673fcce5b75fc69132afa81a36e4568c12c8faba/lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da", size = 5257565, upload-time = "2025-06-26T16:26:17.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/96/e08ff98f2c6426c98c8964513c5dab8d6eb81dadcd0af6f0c538ada78d33/lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e", size = 4713390, upload-time = "2025-06-26T16:26:20.292Z" }, + { url = "https://files.pythonhosted.org/packages/a8/83/6184aba6cc94d7413959f6f8f54807dc318fdcd4985c347fe3ea6937f772/lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741", size = 5066103, upload-time = "2025-06-26T16:26:22.765Z" }, + { url = "https://files.pythonhosted.org/packages/ee/01/8bf1f4035852d0ff2e36a4d9aacdbcc57e93a6cd35a54e05fa984cdf73ab/lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3", size = 4791428, upload-time = "2025-06-26T16:26:26.461Z" }, + { url = "https://files.pythonhosted.org/packages/29/31/c0267d03b16954a85ed6b065116b621d37f559553d9339c7dcc4943a76f1/lxml-6.0.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c907516d49f77f6cd8ead1322198bdfd902003c3c330c77a1c5f3cc32a0e4d16", size = 5678523, upload-time = "2025-07-03T19:19:09.837Z" }, + { url = "https://files.pythonhosted.org/packages/5c/f7/5495829a864bc5f8b0798d2b52a807c89966523140f3d6fa3a58ab6720ea/lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0", size = 5281290, upload-time = "2025-06-26T16:26:29.406Z" }, + { url = "https://files.pythonhosted.org/packages/79/56/6b8edb79d9ed294ccc4e881f4db1023af56ba451909b9ce79f2a2cd7c532/lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a", size = 3613495, upload-time = "2025-06-26T16:26:31.588Z" }, + { url = "https://files.pythonhosted.org/packages/0b/1e/cc32034b40ad6af80b6fd9b66301fc0f180f300002e5c3eb5a6110a93317/lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3", size = 4014711, upload-time = "2025-06-26T16:26:33.723Z" }, + { url = "https://files.pythonhosted.org/packages/55/10/dc8e5290ae4c94bdc1a4c55865be7e1f31dfd857a88b21cbba68b5fea61b/lxml-6.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:8cb26f51c82d77483cdcd2b4a53cda55bbee29b3c2f3ddeb47182a2a9064e4eb", size = 3674431, upload-time = "2025-06-26T16:26:35.959Z" }, + { url = "https://files.pythonhosted.org/packages/79/21/6e7c060822a3c954ff085e5e1b94b4a25757c06529eac91e550f3f5cd8b8/lxml-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6da7cd4f405fd7db56e51e96bff0865b9853ae70df0e6720624049da76bde2da", size = 8414372, upload-time = "2025-06-26T16:26:39.079Z" }, + { url = "https://files.pythonhosted.org/packages/a4/f6/051b1607a459db670fc3a244fa4f06f101a8adf86cda263d1a56b3a4f9d5/lxml-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b34339898bb556a2351a1830f88f751679f343eabf9cf05841c95b165152c9e7", size = 4593940, upload-time = "2025-06-26T16:26:41.891Z" }, + { url = "https://files.pythonhosted.org/packages/8e/74/dd595d92a40bda3c687d70d4487b2c7eff93fd63b568acd64fedd2ba00fe/lxml-6.0.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:51a5e4c61a4541bd1cd3ba74766d0c9b6c12d6a1a4964ef60026832aac8e79b3", size = 5214329, upload-time = "2025-06-26T16:26:44.669Z" }, + { url = "https://files.pythonhosted.org/packages/52/46/3572761efc1bd45fcafb44a63b3b0feeb5b3f0066886821e94b0254f9253/lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81", size = 4947559, upload-time = "2025-06-28T18:47:31.091Z" }, + { url = "https://files.pythonhosted.org/packages/94/8a/5e40de920e67c4f2eef9151097deb9b52d86c95762d8ee238134aff2125d/lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1", size = 5102143, upload-time = "2025-06-28T18:47:33.612Z" }, + { url = "https://files.pythonhosted.org/packages/7c/4b/20555bdd75d57945bdabfbc45fdb1a36a1a0ff9eae4653e951b2b79c9209/lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24", size = 5021931, upload-time = "2025-06-26T16:26:47.503Z" }, + { url = "https://files.pythonhosted.org/packages/b6/6e/cf03b412f3763d4ca23b25e70c96a74cfece64cec3addf1c4ec639586b13/lxml-6.0.0-cp313-cp313-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a78d6c9168f5bcb20971bf3329c2b83078611fbe1f807baadc64afc70523b3a", size = 5645469, upload-time = "2025-07-03T19:19:13.32Z" }, + { url = "https://files.pythonhosted.org/packages/d4/dd/39c8507c16db6031f8c1ddf70ed95dbb0a6d466a40002a3522c128aba472/lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29", size = 5247467, upload-time = "2025-06-26T16:26:49.998Z" }, + { url = "https://files.pythonhosted.org/packages/4d/56/732d49def0631ad633844cfb2664563c830173a98d5efd9b172e89a4800d/lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4", size = 4720601, upload-time = "2025-06-26T16:26:52.564Z" }, + { url = "https://files.pythonhosted.org/packages/8f/7f/6b956fab95fa73462bca25d1ea7fc8274ddf68fb8e60b78d56c03b65278e/lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca", size = 5060227, upload-time = "2025-06-26T16:26:55.054Z" }, + { url = "https://files.pythonhosted.org/packages/97/06/e851ac2924447e8b15a294855caf3d543424364a143c001014d22c8ca94c/lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf", size = 4790637, upload-time = "2025-06-26T16:26:57.384Z" }, + { url = "https://files.pythonhosted.org/packages/06/d4/fd216f3cd6625022c25b336c7570d11f4a43adbaf0a56106d3d496f727a7/lxml-6.0.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4d6036c3a296707357efb375cfc24bb64cd955b9ec731abf11ebb1e40063949f", size = 5662049, upload-time = "2025-07-03T19:19:16.409Z" }, + { url = "https://files.pythonhosted.org/packages/52/03/0e764ce00b95e008d76b99d432f1807f3574fb2945b496a17807a1645dbd/lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef", size = 5272430, upload-time = "2025-06-26T16:27:00.031Z" }, + { url = "https://files.pythonhosted.org/packages/5f/01/d48cc141bc47bc1644d20fe97bbd5e8afb30415ec94f146f2f76d0d9d098/lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181", size = 3612896, upload-time = "2025-06-26T16:27:04.251Z" }, + { url = "https://files.pythonhosted.org/packages/f4/87/6456b9541d186ee7d4cb53bf1b9a0d7f3b1068532676940fdd594ac90865/lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e", size = 4013132, upload-time = "2025-06-26T16:27:06.415Z" }, + { url = "https://files.pythonhosted.org/packages/b7/42/85b3aa8f06ca0d24962f8100f001828e1f1f1a38c954c16e71154ed7d53a/lxml-6.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:21db1ec5525780fd07251636eb5f7acb84003e9382c72c18c542a87c416ade03", size = 3672642, upload-time = "2025-06-26T16:27:09.888Z" }, + { url = "https://files.pythonhosted.org/packages/66/e1/2c22a3cff9e16e1d717014a1e6ec2bf671bf56ea8716bb64466fcf820247/lxml-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:dbdd7679a6f4f08152818043dbb39491d1af3332128b3752c3ec5cebc0011a72", size = 3898804, upload-time = "2025-06-26T16:27:59.751Z" }, + { url = "https://files.pythonhosted.org/packages/2b/3a/d68cbcb4393a2a0a867528741fafb7ce92dac5c9f4a1680df98e5e53e8f5/lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40442e2a4456e9910875ac12951476d36c0870dcb38a68719f8c4686609897c4", size = 4216406, upload-time = "2025-06-28T18:47:45.518Z" }, + { url = "https://files.pythonhosted.org/packages/15/8f/d9bfb13dff715ee3b2a1ec2f4a021347ea3caf9aba93dea0cfe54c01969b/lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db0efd6bae1c4730b9c863fc4f5f3c0fa3e8f05cae2c44ae141cb9dfc7d091dc", size = 4326455, upload-time = "2025-06-28T18:47:48.411Z" }, + { url = "https://files.pythonhosted.org/packages/01/8b/fde194529ee8a27e6f5966d7eef05fa16f0567e4a8e8abc3b855ef6b3400/lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ab542c91f5a47aaa58abdd8ea84b498e8e49fe4b883d67800017757a3eb78e8", size = 4268788, upload-time = "2025-06-26T16:28:02.776Z" }, + { url = "https://files.pythonhosted.org/packages/99/a8/3b8e2581b4f8370fc9e8dc343af4abdfadd9b9229970fc71e67bd31c7df1/lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:013090383863b72c62a702d07678b658fa2567aa58d373d963cca245b017e065", size = 4411394, upload-time = "2025-06-26T16:28:05.179Z" }, + { url = "https://files.pythonhosted.org/packages/e7/a5/899a4719e02ff4383f3f96e5d1878f882f734377f10dfb69e73b5f223e44/lxml-6.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c86df1c9af35d903d2b52d22ea3e66db8058d21dc0f59842ca5deb0595921141", size = 3517946, upload-time = "2025-06-26T16:28:07.665Z" }, +] + [[package]] name = "magicattr" version = "0.1.6" @@ -1982,6 +2079,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" }, ] +[[package]] +name = "markdownify" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/78/c48fed23c7aebc2c16049062e72de1da3220c274de59d28c942acdc9ffb2/markdownify-1.1.0.tar.gz", hash = "sha256:449c0bbbf1401c5112379619524f33b63490a8fa479456d41de9dc9e37560ebd", size = 17127, upload-time = "2025-03-05T11:54:40.574Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/11/b751af7ad41b254a802cf52f7bc1fca7cabe2388132f2ce60a1a6b9b9622/markdownify-1.1.0-py3-none-any.whl", hash = "sha256:32a5a08e9af02c8a6528942224c91b933b4bd2c7d078f9012943776fc313eeef", size = 13901, upload-time = "2025-03-05T11:54:39.454Z" }, +] + [[package]] name = "markupsafe" version = "3.0.2" @@ -3947,6 +4057,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "soupsieve" +version = "2.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/f4/4a80cd6ef364b2e8b65b15816a843c0980f7a5a2b4dc701fc574952aa19f/soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a", size = 103418, upload-time = "2025-04-20T18:50:08.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677, upload-time = "2025-04-20T18:50:07.196Z" }, +] + [[package]] name = "sqlalchemy" version = "2.0.41"