In [6]:
"""
Vietnamese URL Scraper for Common Crawl.

This script queries the Common Crawl Index API (CDXJ) to retrieve metadata
for URLs ending in '.vn'. It paginates through the index results,
deduplicates findings, and saves the URLs, WARC filenames, and offsets
to a CSV file for future processing.

Outputs:
    - vi_commoncrawl_urls.csv: A CSV containing unique Vietnamese URLs
      with their respective WARC storage details.
"""

import requests
import json
import pandas as pd
from tqdm import tqdm
import logging
import time

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
logger = logging.getLogger(__name__)

OUTPUT_FILE = "vi_commoncrawl_urls.csv"
MAX_URLS = 50_000
INDEX_NAME = "CC-MAIN-2025-51-index"
BASE_URL = f"https://index.commoncrawl.org/{INDEX_NAME}"
urls = []

page = 0
while len(urls) < MAX_URLS:
    try:
        logger.info(f"Fetching page {page}…")
        params = {
            "url": "*.vn/*",
            "output": "json",
            "page": page
        }
        resp = requests.get(BASE_URL, params=params, timeout=30)
        if resp.status_code != 200 or not resp.text.strip():
            logger.info(f"No more results at page {page}")
            break

        lines = resp.text.strip().split("\n")
        count_before = len(urls)
        for line in lines:
            try:
                rec = json.loads(line)
                urls.append({
                    "url": rec["url"],
                    "warc_filename": rec["filename"],
                    "offset": rec["offset"],
                    "length": rec["length"]
                })
            except Exception:
                continue

        logger.info(f"Collected {len(urls)} URLs so far")
        if len(urls) == count_before:
            # No new URLs found means likely pagination ended
            break

        page += 1
        time.sleep(1)  # politeness delay between pages

    except Exception as e:
        logger.warning(f"[ERROR] Request failed on page {page}: {e}")
        time.sleep(5)
        continue

df = pd.DataFrame(urls).drop_duplicates(subset="url").head(MAX_URLS)
df.to_csv(OUTPUT_FILE, index=False)
logger.info(f"Done! Saved {len(df)} Vietnamese URLs to {OUTPUT_FILE}")



2026-01-24 14:51:46,499 - Fetching page 0…
2026-01-24 14:51:54,977 - Collected 12417 URLs so far
2026-01-24 14:51:55,982 - Fetching page 1…
2026-01-24 14:52:04,725 - Collected 27417 URLs so far
2026-01-24 14:52:05,730 - Fetching page 2…
2026-01-24 14:52:13,563 - Collected 42417 URLs so far
2026-01-24 14:52:14,567 - Fetching page 3…
2026-01-24 14:52:24,232 - Collected 57417 URLs so far
2026-01-24 14:52:25,780 - ✅ Done! Saved 50000 Vietnamese URLs to vi_commoncrawl_urls.csv


In [1]:
"""
Common Crawl WARC Downloader & Text Extractor.

This script processes a list of Common Crawl metadata to download raw HTML
and convert it into clean text. It uses multi-threading to handle high-latency
requests and I/O operations efficiently.

Workflow:
    1. Reads URL metadata (offset, length, filename) from a local CSV.
    2. Fetches specific WARC records using HTTP Range requests.
    3. Extracts raw HTML and saves it to 'vi_html/'.
    4. Parses HTML with BeautifulSoup (lxml) to remove scripts/styles.
    5. Saves cleaned text files to 'vi_text/'.

Configuration:
    - Threading: Controlled by MAX_THREADS (default: 20).
    - Batching: Processes URLs in chunks (BATCH_SIZE) to manage memory.

The code here has issues with URLs sharing the same first 60 characters and hence we used the code below for our data collection instead.
"""

import pandas as pd
import requests
import io
from pathlib import Path
from warcio.archiveiterator import ArchiveIterator
from bs4 import BeautifulSoup
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

# -------------------------------
# CONFIG
# -------------------------------
URL_CSV = "vi_commoncrawl_urls.csv"
HTML_DIR = Path("vi_html")
TEXT_DIR = Path("vi_text")
HTML_DIR.mkdir(exist_ok=True)
TEXT_DIR.mkdir(exist_ok=True)
MAX_THREADS = 20
MAX_RETRIES = 3
BATCH_SIZE = 1000  # split URLs into manageable chunks

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
logger = logging.getLogger(__name__)

# -------------------------------
# HELPERS
# -------------------------------
def fetch_html(row):
    url = row["url"]
    warc_path = row["warc_filename"]
    offset = int(row["offset"])
    length = int(row["length"])
    if "crawldiagnostics" in warc_path.lower():
        logger.info(f"⏭️ Skipping diagnostic WARC for {url}")
        return None

    http_url = f"https://data.commoncrawl.org/{warc_path}"
    headers = {"Range": f"bytes={offset}-{offset + length - 1}"}
    safe_name = "".join(c if c.isalnum() else "_" for c in url[:60]) + ".html"
    output_path = HTML_DIR / safe_name

    if output_path.exists():
        return output_path

    for attempt in range(MAX_RETRIES):
        try:
            resp = requests.get(http_url, headers=headers, timeout=60)
            resp.raise_for_status()
            for record in ArchiveIterator(io.BytesIO(resp.content)):
                if record.rec_type == "response":
                    html = record.content_stream().read().decode("utf-8", errors="ignore")
                    output_path.write_text(html, encoding="utf-8")
                    logger.info(f"✅ Fetched {url}")
                    return output_path
        except Exception as e:
            logger.warning(f"⚠️ Attempt {attempt+1} failed for {url}: {e}")
            time.sleep(2)
    return None

def extract_text(html_file):
    try:
        html = html_file.read_text(encoding="utf-8", errors="ignore")
        soup = BeautifulSoup(html, "lxml")
        for script in soup(["script", "style"]):
            script.decompose()
        lines = [line.strip() for line in soup.get_text(separator="\n").splitlines() if line.strip()]
        clean_text = "\n".join(lines)
        txt_file = TEXT_DIR / (html_file.stem + ".txt")
        txt_file.write_text(clean_text, encoding="utf-8")
        logger.info(f"✅ Extracted {html_file.name}")
        return txt_file
    except Exception as e:
        logger.warning(f"⚠️ Failed to extract {html_file.name}: {e}")
        return None

# -------------------------------
# MAIN
# -------------------------------
df = pd.read_csv(URL_CSV)
logger.info(f"Loaded {len(df)} URLs from {URL_CSV}")

# Split into batches to avoid overloading threads
for start in range(0, len(df), BATCH_SIZE):
    batch = df.iloc[start:start+BATCH_SIZE]
    logger.info(f"Processing batch {start} to {start + len(batch)}")

    # ---- Fetch HTML ----
    html_files = []
    with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
        futures = [executor.submit(fetch_html, row) for _, row in batch.iterrows()]
        for future in as_completed(futures):
            result = future.result()
            if result:
                html_files.append(result)

    logger.info(f"✅ Downloaded {len(html_files)} HTML files in this batch")

    # ---- Extract text ----
    with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
        futures = [executor.submit(extract_text, html_file) for html_file in html_files]
        for future in as_completed(futures):
            _ = future.result()

logger.info(f"🎉 All done! Text files are in {TEXT_DIR}")


2026-01-24 16:34:22,398 - Loaded 50000 URLs from vi_commoncrawl_urls.csv
2026-01-24 16:34:22,401 - Processing batch 0 to 1000
2026-01-24 16:34:22,448 - ⏭️ Skipping diagnostic WARC for https://028.vn/bat-dong-san/chinh-chu-can-ban-ha-trng-tam-da-nang-35195.html
2026-01-24 16:34:22,449 - ⏭️ Skipping diagnostic WARC for https://028.vn/cac-loai-hinh-khac/bang-gia-kham-nam-khoa-tai-binh-duong-chi-phi-cu-the-cho-tung-hang-muc-34856.html
2026-01-24 16:34:22,461 - ⏭️ Skipping diagnostic WARC for https://028.vn/cac-loai-hinh-khac/bo-mat-na-phong-doc-vian-620p-34870.html
2026-01-24 16:34:22,472 - ⏭️ Skipping diagnostic WARC for https://028.vn/cac-loai-hinh-khac/cau-truot-tre-em-xich-du-tre-em-du-quay-tre-em-nhap-khau-34853.html
2026-01-24 16:34:22,483 - ⏭️ Skipping diagnostic WARC for https://028.vn/cac-loai-hinh-khac/cua-cach-nhiet-lua-chon-thong-minh-cho-nha-o-va-cong-trinh-xanh-34850.html
2026-01-24 16:34:22,484 - ⏭️ Skipping diagnostic WARC for https://028.vn/cac-loai-hinh-khac/cua-go-ho-chi

In [None]:
"""
Common Crawl Downloader & Extractor (Optimized for I/O).

This script handles large-scale data retrieval and processing. It is specifically
architected to bypass Python's Global Interpreter Lock (GIL) limitations.

Threading Strategy:
    - Network I/O: Threading is used for 'requests.get' because the GIL is
      released while waiting for the Common Crawl server to respond.
    - Disk I/O: The GIL is released during 'write_text' and 'read_text' calls,
      allowing multiple files to be saved/loaded concurrently.
    - Resume Logic: Identifies missing files via SHA-1 hashes before execution
       to ensure the script is idempotent.

Dependencies:
    - warcio: For stream-parsing WARC records.
    - BeautifulSoup: For HTML-to-text conversion.
"""

import pandas as pd
import requests
import io
from pathlib import Path
from warcio.archiveiterator import ArchiveIterator
from bs4 import BeautifulSoup
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import hashlib

# -------------------------------
# CONFIG
# -------------------------------
URL_CSV = "vi_commoncrawl_urls.csv"
HTML_DIR = Path("vi_html")
TEXT_DIR = Path("vi_text")
HTML_DIR.mkdir(exist_ok=True)
TEXT_DIR.mkdir(exist_ok=True)
MAX_THREADS = 20
MAX_RETRIES = 3
BATCH_SIZE = 1000  # split URLs into manageable chunks

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
logger = logging.getLogger(__name__)


# -------------------------------
# HELPERS
# -------------------------------
def url_to_filename(url: str) -> str:
    """Generate a unique filename for a URL using a hash."""
    h = hashlib.sha1(url.encode()).hexdigest()[:12]  # 12 hex chars
    safe = "".join(c if c.isalnum() else "_" for c in url[:30])  # first 30 chars safe
    return f"{safe}_{h}.html"


def fetch_html(row):
    url = row["url"]
    warc_path = row["warc_filename"]
    offset = int(row["offset"])
    length = int(row["length"])
    if "crawldiagnostics" in warc_path.lower():
        logger.info(f"⏭️ Skipping diagnostic WARC for {url}")
        return None

    http_url = f"https://data.commoncrawl.org/{warc_path}"
    headers = {"Range": f"bytes={offset}-{offset + length - 1}"}
    safe_name = url_to_filename(url)
    output_path = HTML_DIR / safe_name

    if output_path.exists():
        return output_path

    for attempt in range(MAX_RETRIES):
        try:
            resp = requests.get(http_url, headers=headers, timeout=60)
            resp.raise_for_status()
            for record in ArchiveIterator(io.BytesIO(resp.content)):
                if record.rec_type == "response":
                    html = record.content_stream().read().decode("utf-8", errors="ignore")
                    output_path.write_text(html, encoding="utf-8")
                    logger.info(f"✅ Fetched {url}")
                    return output_path
        except Exception as e:
            logger.warning(f"⚠️ Attempt {attempt + 1} failed for {url}: {e}")
            time.sleep(2)
    return None


def extract_text(html_file):
    try:
        html = html_file.read_text(encoding="utf-8", errors="ignore")
        soup = BeautifulSoup(html, "lxml")
        for script in soup(["script", "style"]):
            script.decompose()
        lines = [line.strip() for line in soup.get_text(separator="\n").splitlines() if line.strip()]
        clean_text = "\n".join(lines)
        txt_file = TEXT_DIR / (html_file.stem + ".txt")
        txt_file.write_text(clean_text, encoding="utf-8")
        logger.info(f"✅ Extracted {html_file.name}")
        return txt_file
    except Exception as e:
        logger.warning(f"⚠️ Failed to extract {html_file.name}: {e}")
        return None


# -------------------------------
# MAIN
# -------------------------------
df = pd.read_csv(URL_CSV)
logger.info(f"Loaded {len(df)} URLs from {URL_CSV}")

# Determine which URLs are missing (resume-safe)
existing_files = set(p.name for p in HTML_DIR.glob("*.html"))
missing_df = df[df.apply(lambda row: url_to_filename(row["url"]) not in existing_files, axis=1)]
logger.info(f"{len(missing_df)} URLs remaining to fetch")

for start in range(0, len(missing_df), BATCH_SIZE):
    batch = missing_df.iloc[start:start + BATCH_SIZE]
    logger.info(f"Processing batch {start} to {start + len(batch)}")

    # ---- Fetch HTML ----
    html_files = []
    with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
        futures = [executor.submit(fetch_html, row) for _, row in batch.iterrows()]
        for future in as_completed(futures):
            result = future.result()
            if result:
                html_files.append(result)

    logger.info(f"✅ Downloaded {len(html_files)} HTML files in this batch")

    # ---- Extract text ----
    with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
        futures = [executor.submit(extract_text, html_file) for html_file in html_files]
        for future in as_completed(futures):
            _ = future.result()

logger.info(f"🎉 All done! Text files are in {TEXT_DIR}")


2026-01-25 17:28:20,140 - Loaded 50000 URLs from vi_commoncrawl_urls.csv
2026-01-25 17:28:31,912 - 50000 URLs remaining to fetch
2026-01-25 17:28:35,165 - Processing batch 0 to 1000
2026-01-25 17:28:39,309 - ✅ Fetched http://0101.vn/robots.txt
2026-01-25 17:28:39,336 - ✅ Fetched http://0101.vn/Downloads.aspx?cat=7
2026-01-25 17:28:39,353 - ✅ Fetched https://007studio.vn/index.php/2023/12/17/man-restaurant/
2026-01-25 17:28:39,354 - ✅ Fetched http://0101.vn/Downloads.aspx?cat=9
2026-01-25 17:28:39,355 - ✅ Fetched http://0101.vn/ShowArticle.aspx?ID=1025
2026-01-25 17:28:39,355 - ✅ Fetched http://0101.vn/
2026-01-25 17:28:39,356 - ✅ Fetched http://0101.vn/Downloads.aspx?cat=1
2026-01-25 17:28:39,356 - ✅ Fetched https://007studio.vn/index.php/2023/12/17/viet-duc-showroom/
2026-01-25 17:28:39,356 - ✅ Fetched http://0101.vn/Downloads.aspx?cat=8
2026-01-25 17:28:39,357 - ✅ Fetched http://0101.vn/Default.aspx
2026-01-25 17:28:39,357 - ✅ Fetched http://0101.vn/Downloads.aspx?cat=5
2026-01-25 17

In [1]:
from datasets import load_dataset
from urllib.parse import urlparse
import pandas as pd
from pathlib import Path
from tqdm import tqdm  # optional visual progress

# ────────────────────────────────────────────────
# Configuration
# ────────────────────────────────────────────────
TARGET_COUNT       = 200_000          # adjust as needed
OUTPUT_CSV         = Path("data/vi_mc4_urls.csv")
FILTER_NON_VN      = True             # exclude .vn domains for diversity
MIN_TEXT_LENGTH    = 200              # skip very short documents

# ────────────────────────────────────────────────
# Load streaming dataset (no full download)
# ────────────────────────────────────────────────
print("Loading streaming Vietnamese mC4 subset...")
ds = load_dataset(
    "allenai/c4",
    "vi",                     # or "multilingual" + manual lang filter if needed
    split="train",
    streaming=True,
    trust_remote_code=True    # sometimes required for older configs
)

# ────────────────────────────────────────────────
# Collect URLs with basic filtering
# ────────────────────────────────────────────────
collected = []
domain_counts = {}  # optional: track TLD diversity

print("Collecting URLs...")
for example in tqdm(ds, desc="Processing examples"):
    url = example["url"].strip()
    text = example["text"].strip()

    if len(text) < MIN_TEXT_LENGTH:
        continue

    domain = urlparse(url).netloc.lower()

    if FILTER_NON_VN and domain.endswith(".vn"):
        continue

    collected.append({
        "url": url,
        "domain": domain,
        "text_length": len(text),
        "timestamp": example.get("timestamp", "")
    })

    # Optional: track domain stats
    tld = "." + domain.split(".")[-1]
    domain_counts[tld] = domain_counts.get(tld, 0) + 1

    if len(collected) >= TARGET_COUNT:
        break

# ────────────────────────────────────────────────
# Save & summarize
# ────────────────────────────────────────────────
if collected:
    df = pd.DataFrame(collected)
    df = df.drop_duplicates(subset="url")
    df.to_csv(OUTPUT_CSV, index=False)

    print(f"\nDone! Collected and saved {len(df):,} unique URLs")
    print(f"Output → {OUTPUT_CSV.resolve()}")

    if domain_counts:
        print("\nTop TLD distribution (for diversity check):")
        print(pd.Series(domain_counts).sort_values(ascending=False).head(12))
else:
    print("No documents collected – check connectivity or filters.")

  from .autonotebook import tqdm as notebook_tqdm
`trust_remote_code` is not supported anymore.
Please check that the Hugging Face dataset 'allenai/c4' isn't based on a loading script and remove `trust_remote_code`.
If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet.


Loading streaming Vietnamese mC4 subset...
Collecting URLs...


Processing examples: 403841it [04:04, 1651.24it/s]



Done! Collected and saved 200,000 unique URLs
Output → /Users/restaurantalsheika/graph-aware-phishing-commoncrawl/notebooks/data/vi_mc4_urls.csv

Top TLD distribution (for diversity check):
.com       134041
.net        30193
.org        12200
.info        6044
.tv          1646
.cn          1128
.biz         1013
.ru           805
.co           700
.online       647
.xyz          642
.us           638
dtype: int64


In [2]:
import os
from pathlib import Path

csv_path = Path("/Users/restaurantalsheika/graph-aware-phishing-commoncrawl/notebooks/data/vi_mc4_urls.csv")

print("Does the file exist?", csv_path.exists())
print("Is it a file?", csv_path.is_file())
print("Absolute path:", csv_path.resolve())
print("Parent directory exists?", csv_path.parent.exists())
print("Files in parent directory:", list(csv_path.parent.glob("*")))

Does the file exist? True
Is it a file? True
Absolute path: /Users/restaurantalsheika/graph-aware-phishing-commoncrawl/notebooks/data/vi_mc4_urls.csv
Parent directory exists? True
Files in parent directory: [PosixPath('/Users/restaurantalsheika/graph-aware-phishing-commoncrawl/notebooks/data/safe'), PosixPath('/Users/restaurantalsheika/graph-aware-phishing-commoncrawl/notebooks/data/vi_non_vn_urls_summary.txt'), PosixPath('/Users/restaurantalsheika/graph-aware-phishing-commoncrawl/notebooks/data/vi_mc4_urls.csv')]


In [10]:
import pandas as pd
from pathlib import Path
from urllib.parse import urlparse
from datasets import load_dataset
from tqdm import tqdm

# ────────────────────────────────────────────────
# Configuration
# ────────────────────────────────────────────────
EXISTING_CSV     = Path("data/vi_mc4_urls.csv")
NEW_CSV          = Path("data/vi_mc4_additional_urls.csv")
TARGET_NEW_URLS  = 500_000          # aim for 500k new URLs → expect 45–100k successful HTML later
MIN_TEXT_LENGTH  = 400
FILTER_NON_VN    = True

# ────────────────────────────────────────────────
# Load existing URLs into a set for fast lookup
# ────────────────────────────────────────────────
print("Loading existing URLs for deduplication...")
if EXISTING_CSV.is_file():
    df_existing = pd.read_csv(EXISTING_CSV, usecols=["url"])
    existing_urls = set(df_existing["url"].dropna().str.strip())
    print(f"Loaded {len(existing_urls):,} unique URLs from existing file")
else:
    existing_urls = set()
    print("No existing file found → no deduplication will be applied")

# ────────────────────────────────────────────────
# Stream additional documents from mC4 Vietnamese subset
# ────────────────────────────────────────────────
print("\nStreaming additional documents from allenai/c4 'vi' subset...")
ds = load_dataset("allenai/c4", "vi", split="train", streaming=True)

collected = []
skipped_existing = 0
skipped_short    = 0
skipped_vn       = 0

print("Collecting new URLs (estimated time: 15–60 minutes depending on network and filtering)...")

for example in tqdm(ds, desc="Processing mC4 documents", unit="doc"):
    text = example.get("text", "").strip()
    if len(text) < MIN_TEXT_LENGTH:
        skipped_short += 1
        continue

    url = example.get("url", "").strip()
    if not url:
        continue

    # Skip if already in the existing collection
    if url in existing_urls:
        skipped_existing += 1
        continue

    domain = urlparse(url).netloc.lower()
    if FILTER_NON_VN and domain.endswith(".vn"):
        skipped_vn += 1
        continue

    collected.append({
        "url": url,
        "domain": domain,
        "text_length": len(text),
        "timestamp": example.get("timestamp", ""),
    })

    if len(collected) >= TARGET_NEW_URLS:
        break

# ────────────────────────────────────────────────
# Save the new collection
# ────────────────────────────────────────────────
if collected:
    df_new = pd.DataFrame(collected).drop_duplicates(subset="url")
    df_new.to_csv(NEW_CSV, index=False)

    print("\n" + "═" * 70)
    print("Collection complete")
    print(f"New unique URLs saved:          {len(df_new):,}")
    print(f"Skipped (already in existing):  {skipped_existing:,}")
    print(f"Skipped (text too short):       {skipped_short:,}")
    print(f"Skipped (.vn domains):          {skipped_vn:,}")
    print(f"File written → {NEW_CSV.resolve()}")

    print("\nTop 12 domains in new collection:")
    print(df_new["domain"].value_counts().head(12))
else:
    print("No new URLs collected after applying filters.")

Loading existing URLs for deduplication...
Loaded 200,000 unique URLs from existing file

Streaming additional documents from allenai/c4 'vi' subset...
Collecting new URLs (estimated time: 15–60 minutes depending on network and filtering)...



Processing mC4 documents: 0doc [00:00, ?doc/s][A
Processing mC4 documents: 1doc [00:04,  4.27s/doc][A
Processing mC4 documents: 906doc [00:04, 293.13doc/s][A
Processing mC4 documents: 1552doc [00:04, 533.84doc/s][A
Processing mC4 documents: 2980doc [00:04, 1306.39doc/s][A
Processing mC4 documents: 3792doc [00:05, 1089.51doc/s][A
Processing mC4 documents: 4598doc [00:06, 1255.80doc/s][A
Processing mC4 documents: 5442doc [00:06, 1733.58doc/s][A
Processing mC4 documents: 6002doc [00:06, 2020.54doc/s][A
Processing mC4 documents: 6524doc [00:07, 1195.96doc/s][A
Processing mC4 documents: 6899doc [00:07, 1110.10doc/s][A
Processing mC4 documents: 7374doc [00:07, 1387.61doc/s][A
Processing mC4 documents: 7715doc [00:08, 1582.51doc/s][A
Processing mC4 documents: 8053doc [00:08, 1136.93doc/s][A
Processing mC4 documents: 8556doc [00:08, 1507.75doc/s][A
Processing mC4 documents: 8869doc [00:08, 1605.81doc/s][A
Processing mC4 documents: 9308doc [00:09, 969.17doc/s] [A
Processing mC


══════════════════════════════════════════════════════════════════════
Collection complete
New unique URLs saved:          500,000
Skipped (already in existing):  183,238
Skipped (text too short):       220,674
Skipped (.vn domains):          602,484
File written → /Users/restaurantalsheika/graph-aware-phishing-commoncrawl/notebooks/data/vi_mc4_additional_urls.csv

Top 12 domains in new collection:
domain
123doc.org                3228
vnexpress.net             2655
bachhoa24.com             2195
www.baomoi.com            2069
vi.wikipedia.org          1827
vietnamese.alibaba.com    1801
timdat.net                1785
truyenyy.com              1640
vietgiaitri.com           1614
m.baomoi.com              1610
www.ivivu.com             1526
vatgia.com                1460
Name: count, dtype: int64


In [6]:
import asyncio
import aiohttp
import aiofiles
import hashlib
from pathlib import Path
from urllib.parse import urlparse
import pandas as pd
from tqdm.asyncio import tqdm_asyncio
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from aiohttp import ClientError, ClientConnectorError, ClientResponseError, ClientTimeout, ClientConnectorCertificateError

# ────────────────────────────────────────────────
# Configuration
# ────────────────────────────────────────────────
CSV_PATH         = Path("data/vi_mc4_urls.csv")
HTML_DIR         = Path("data/html_mc4_current")
HTML_DIR.mkdir(exist_ok=True, parents=True)

MAX_CONCURRENT   = 25                # conservative starting value
REQUEST_TIMEOUT  = aiohttp.ClientTimeout(total=45)
MAX_RETRIES      = 5                 # per URL
BATCH_SIZE       = 2000              # adjust as needed
POLITENESS_DELAY = 1.5               # seconds between batches

USER_AGENT = (
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/122.0.0.0 Safari/537.36"
)

# ────────────────────────────────────────────────
# Retry decorator (certificate errors are included)
# ────────────────────────────────────────────────
@retry(
    stop=stop_after_attempt(MAX_RETRIES),
    wait=wait_exponential(multiplier=1.2, min=3, max=60),
    retry=retry_if_exception_type((
        ClientError,
        ClientConnectorError,
        ClientResponseError,
        ClientTimeout,
        ClientConnectorCertificateError
    )),
    reraise=True
)
async def fetch_single(session: aiohttp.ClientSession, url: str) -> tuple[Path | None, str]:
    headers = {"User-Agent": USER_AGENT}
    async with session.get(
        url,
        headers=headers,
        timeout=REQUEST_TIMEOUT,
        allow_redirects=True
        # No ssl=False — full verification
    ) as resp:
        if resp.status != 200:
            return None, url
        html_bytes = await resp.read()
        domain_hash = hashlib.sha256(url.encode()).hexdigest()[:12]
        safe_name = f"{urlparse(url).netloc.replace('.', '_')}_{domain_hash}.html"
        path = HTML_DIR / safe_name
        async with aiofiles.open(path, "wb") as f:
            await f.write(html_bytes)
        return path, url

# ────────────────────────────────────────────────
# Batch processor — continues even if individual tasks fail
# ────────────────────────────────────────────────
async def process_batch(urls: list[str]):
    connector = aiohttp.TCPConnector(limit=MAX_CONCURRENT)
    async with aiohttp.ClientSession(connector=connector) as session:
        tasks = [fetch_single(session, url) for url in urls]
        results = []
        failures = []

        for coro in tqdm_asyncio.as_completed(tasks, desc=f"Fetching {len(urls)} URLs"):
            try:
                result = await coro
                results.append(result)
            except Exception as exc:
                # Capture the URL and exception for reporting
                # (We know which future failed by matching, but for simplicity we log here)
                failed_url = "unknown"  # improved tracking requires more bookkeeping
                failures.append((failed_url, exc))
                results.append((None, failed_url))

        success = sum(1 for r, _ in results if r is not None)
        if failures:
            print(f"Batch had {len(failures)} failures after retries:")
            for url, exc in failures[:5]:  # show first 5 only
                print(f"  • {url}: {type(exc).__name__} – {exc}")
            if len(failures) > 5:
                print(f"  … and {len(failures)-5} more")

        print(f"Batch complete → {success:,} successful / {len(urls):,} attempted")
        return success

# ────────────────────────────────────────────────
# Main orchestrator
# ────────────────────────────────────────────────
async def main_fetch():
    df = pd.read_csv(CSV_PATH)
    all_urls = df["url"].dropna().tolist()
    print(f"Total unique URLs to fetch: {len(all_urls):,}")

    total_success = 0
    for i in range(0, len(all_urls), BATCH_SIZE):
        batch_urls = all_urls[i:i + BATCH_SIZE]
        print(f"\nStarting batch {i // BATCH_SIZE + 1} ({len(batch_urls)} URLs)")
        success = await process_batch(batch_urls)
        total_success += success

        if i + BATCH_SIZE < len(all_urls):
            print(f"Pausing {POLITENESS_DELAY:.1f} seconds between batches...")
            await asyncio.sleep(POLITENESS_DELAY)

    print(f"\nFetching finished. Total successful HTML files: {total_success:,}")

# Execute in notebook context
await main_fetch()

Total unique URLs to fetch: 200,000

Starting batch 1 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<51:07,  1.53s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<34:06,  1.02s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<21:10,  1.57it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:02<17:51,  1.86it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:02<10:04,  3.30it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:03<03:41,  8.97it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:03<03:25,  9.66it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:03<03:00, 11.00it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:03<02:19, 14.20it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:03<02:10, 15.11it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:03<02:10, 15.11it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:04<03:51,  8.53it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:04<02:58, 11.03

Batch had 1684 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1679 more
Batch complete → 208 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 2 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:04<2:19:48,  4.20s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:04<1:04:12,  1.93s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:04<19:25,  1.71it/s]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<12:13,  2.72it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:04<08:40,  3.83it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:05<06:36,  5.01it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:05<05:00,  6.62it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:05<04:24,  7.51it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:05<03:41,  8.95it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:05<02:42, 12.19it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:05<02:04, 15.87it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:06<02:11, 15.01it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:06<01:56,

Batch had 1643 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.net/lynk-lee-mac-vay-quet-dat-lan-dau-lam-vedette-4132629.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1638 more
Batch complete → 214 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 3 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:20:55,  2.43s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<48:44,  1.46s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:03<32:21,  1.03it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:05<41:40,  1.25s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:05<28:20,  1.17it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:05<16:06,  2.06it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:05<03:55,  8.43it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:06<03:27,  9.56it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:06<03:25,  9.61it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:06<02:54, 11.32it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:06<02:45, 11.91it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:06<02:02, 16.10it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:07<02:53, 11

Batch had 1654 failures after retries:
  • unknown: ClientConnectorDNSError – Cannot connect to host acm-soict.orgdu-lich-mien-bac:80 ssl:default [nodename nor servname provided, or not known]
  • unknown: TooManyRedirects – 0, message='', url='http://www.tvvn.org/ha-mieng-mac-quai-suu-tam/'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1649 more
Batch complete → 210 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 4 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<22:24,  1.49it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<12:02,  2.76it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:01<03:54,  8.48it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:01<03:18, 10.05it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:01<02:40, 12.34it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:01<02:44, 12.05it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:02<02:51, 11.53it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:02<02:50, 11.59it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:02<01:49, 18.08it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:02<01:52, 17.48it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:02<01:46, 18.54it/s][A
Fetching 2000 URLs:   2%|▏         | 36/2000 [00:03<02:11, 14.98it/s][A
Fetching 2000 URLs:   2%|▏         | 39/2000 [00:03<02:25, 13.

Batch had 1649 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host vietnamese.china.com:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1017)')]
  • unknown: ClientConnectorCertificateError – Cannot connect to host hoatuoitructuyen.net:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1017)')]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1644 more
Batch complete → 217 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 5 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<16:51,  1.98it/s][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:00<12:31,  2.66it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:00<09:33,  3.48it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:01<07:24,  4.49it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:01<04:23,  7.56it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:01<04:36,  7.22it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:01<02:59, 11.09it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:01<02:03, 16.13it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:01<01:29, 22.07it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:02<01:19, 24.76it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:02<01:28, 22.34it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:02<01:29, 21.89it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:02<02:24, 13.63i

Batch had 1665 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1660 more
Batch complete → 212 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 6 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:10:55,  2.13s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<33:21,  1.00s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<25:52,  1.29it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:03<17:42,  1.88it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:03<09:15,  3.59it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:03<06:14,  5.33it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:03<04:35,  7.21it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:03<03:11, 10.40it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:03<03:31,  9.39it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:03<02:39, 12.42it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:04<02:35, 12.72it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:04<02:21, 14.03it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:04<01:28, 22

Batch had 1675 failures after retries:
  • unknown: ClientConnectorError – Cannot connect to host www.kiemtienquamang.com:443 ssl:default [None]
  • unknown: TooManyRedirects – 0, message='', url='https://www.tvvn.org/jungle-book-nguyen-ngoc-chan/'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1670 more
Batch complete → 204 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 7 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<1:56:19,  3.49s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<49:55,  1.50s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:04<38:48,  1.17s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:04<25:17,  1.31it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<10:16,  3.23it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:04<08:17,  4.00it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:05<05:01,  6.60it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:05<03:30,  9.44it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:05<02:27, 13.43it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:05<02:52, 11.45it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:06<02:32, 12.90it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:06<02:37, 12.47it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:06<02:19, 14

Batch had 1671 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1666 more
Batch complete → 210 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 8 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:25:56,  2.58s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<38:53,  1.17s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:03<26:11,  1.27it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:03<20:43,  1.61it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:03<14:28,  2.30it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:03<12:01,  2.76it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<10:22,  3.20it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:04<08:32,  3.88it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:04<03:03, 10.82it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<02:55, 11.29it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:04<02:37, 12.57it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:04<02:46, 11.85it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:05<02:37, 12.5

Batch had 1705 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1700 more
Batch complete → 188 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 9 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<46:53,  1.41s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<14:40,  2.27it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:01<08:19,  3.99it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:01<07:28,  4.44it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:02<03:53,  8.54it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:02<03:28,  9.51it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:02<03:28,  9.54it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:02<03:10, 10.40it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:02<02:53, 11.43it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:02<02:48, 11.76it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:03<01:51, 17.65it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:03<01:42, 19.22it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:03<02:03, 15.9

Batch had 1706 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1701 more
Batch complete → 183 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 10 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:04<2:37:36,  4.73s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:05<1:26:06,  2.59s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:06<58:46,  1.77s/it]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:08<53:49,  1.62s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:08<41:06,  1.24s/it][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:08<16:53,  1.97it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:10<24:52,  1.33it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:10<19:21,  1.71it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:11<07:43,  4.28it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:11<07:08,  4.63it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:11<03:35,  9.16it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:11<03:51,  8.51it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:12<04:05,  

Batch had 1686 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1681 more
Batch complete → 200 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 11 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:14:14,  2.23s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<39:33,  1.19s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:02<12:42,  2.62it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:02<08:18,  4.00it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:03<06:35,  5.04it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:03<03:38,  9.11it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:03<03:44,  8.83it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:03<03:34,  9.26it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:04<04:38,  7.10it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:04<04:29,  7.33it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:05<04:22,  7.53it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:05<03:46,  8.69it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:05<04:46,  

Batch had 1734 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.net/tin-tuc/girls-boys/cac-loai-son-duong-moi-mem-min-tu-nhat-ban-3315624.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1729 more
Batch complete → 177 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 12 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<32:12,  1.03it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<11:21,  2.93it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:01<07:25,  4.48it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:01<05:41,  5.83it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:01<06:10,  5.37it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:02<02:35, 12.76it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:02<02:05, 15.79it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:02<01:47, 18.39it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:02<02:01, 16.30it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:03<02:27, 13.38it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:03<02:38, 12.40it/s][A
Fetching 2000 URLs:   2%|▏         | 38/2000 [00:03<01:58, 16.49it/s][A
Fetching 2000 URLs:   2%|▏         | 41/2000 [00:03<01:48, 18.12

Batch had 1618 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host www.hitchhikersgui.de:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1017)')]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1613 more
Batch complete → 225 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 13 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:12<7:04:48, 12.75s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:13<3:13:37,  5.81s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:14<1:53:12,  3.40s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:14<1:11:42,  2.16s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:14<48:35,  1.46s/it]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:14<25:17,  1.31it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:15<27:55,  1.19it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:16<22:18,  1.49it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:16<20:32,  1.61it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:16<17:04,  1.94it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:18<18:14,  1.81it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:18<13:35,  2.43it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:18<08:12

Batch had 1811 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host maychuaogiarenhat.com:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1017)')]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1806 more
Batch complete → 110 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 14 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:12<6:48:32, 12.26s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:14<3:26:01,  6.19s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:14<1:20:07,  2.41s/it][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:14<25:24,  1.31it/s]  [A
Fetching 2000 URLs:   1%|          | 11/2000 [00:15<20:19,  1.63it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:16<18:42,  1.77it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:16<10:41,  3.09it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:16<09:36,  3.43it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:16<06:48,  4.84it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:17<05:57,  5.52it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:17<05:02,  6.51it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:17<03:39,  8.97it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:17<03:

Batch had 1774 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1769 more
Batch complete → 130 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 15 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:11<6:25:03, 11.56s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:11<2:42:54,  4.89s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:12<1:34:19,  2.83s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:12<1:06:34,  2.00s/it][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:13<25:54,  1.28it/s]  [A
Fetching 2000 URLs:   0%|          | 8/2000 [00:13<26:21,  1.26it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:14<20:54,  1.59it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:14<19:32,  1.70it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:14<12:16,  2.70it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:15<12:36,  2.63it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:15<11:25,  2.90it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:15<07:47,  4.25it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:15<06:0

Batch had 1689 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1684 more
Batch complete → 204 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 16 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:35:53,  2.88s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<52:27,  1.58s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:04<38:45,  1.16s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:04<25:07,  1.32it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:04<18:14,  1.82it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:04<13:55,  2.39it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:05<13:24,  2.48it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:05<12:20,  2.69it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:05<11:56,  2.78it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:05<07:52,  4.21it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:06<12:18,  2.69it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:06<11:28,  2.89it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:07<05:51,  5.64

Batch had 1720 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host chaobuoisang.net:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1017)')]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1715 more
Batch complete → 174 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 17 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<1:06:11,  1.99s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<54:34,  1.64s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:03<16:25,  2.02it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<15:16,  2.17it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:04<13:05,  2.54it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:04<05:10,  6.40it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:04<04:51,  6.80it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:05<05:02,  6.56it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:05<03:31,  9.34it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:05<03:43,  8.85it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:05<03:44,  8.79it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:06<02:59, 10.97it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:06<02:29, 1

Batch had 1671 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host www.ceoclubvietnam.com:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1017)')]
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/usb-sandisk-ixpand-64gb.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1666 more
Batch complete → 208 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 18 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:23:00,  2.49s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<36:59,  1.11s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<24:01,  1.38it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:03<16:27,  2.02it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:03<09:07,  3.64it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:03<09:55,  3.34it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:04<08:37,  3.85it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:04<07:20,  4.51it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<02:30, 13.17it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:04<02:51, 11.58it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:04<02:38, 12.49it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:05<02:48, 11.71it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:05<03:01, 10.

Batch had 1718 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='http://meyeucon.org/16903/tre-cung-co-the-bi-di-ung-khi-bu-sua-me/'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1713 more
Batch complete → 168 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 19 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:07<3:56:13,  7.09s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:07<1:41:24,  3.05s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:07<41:51,  1.26s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:07<30:18,  1.10it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:08<17:50,  1.86it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:09<17:21,  1.91it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:09<12:44,  2.60it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:09<14:03,  2.36it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:10<06:53,  4.80it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:10<07:16,  4.54it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:11<08:36,  3.83it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:11<08:49,  3.74it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:12<08:30, 

Batch had 1770 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.vnexpress.net/tin-tuc/sao/nhung-guong-mat-vut-sang-thanh-sao-nam-2013-2924592.html'
  • unknown: ClientConnectorError – Cannot connect to host www.webdanang.com:443 ssl:default [None]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1765 more
Batch complete → 138 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 20 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:11<6:27:26, 11.63s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:12<2:52:41,  5.19s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:13<1:45:18,  3.16s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:13<1:05:19,  1.96s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:13<44:33,  1.34s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:13<32:14,  1.03it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:14<29:50,  1.11it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:14<21:41,  1.53it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:14<06:52,  4.82it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:15<05:01,  6.58it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:15<05:07,  6.44it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:15<04:50,  6.81it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:15<05:24

Batch had 1815 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1810 more
Batch complete → 109 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 21 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<1:39:56,  3.00s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:05<1:35:43,  2.87s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:06<41:26,  1.25s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:06<29:47,  1.12it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:06<24:43,  1.34it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:06<18:30,  1.79it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:07<13:54,  2.39it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:07<12:39,  2.62it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:07<10:10,  3.26it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:08<09:37,  3.44it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:08<11:06,  2.98it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:10<08:23,  3.93it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:10<07:32,  4

Batch had 1824 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1819 more
Batch complete → 114 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 22 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:05<2:51:34,  5.15s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:05<1:13:05,  2.19s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:06<50:40,  1.52s/it]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:08<1:02:30,  1.88s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:08<43:33,  1.31s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:09<33:20,  1.00s/it][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:09<30:06,  1.10it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:10<15:15,  2.17it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:10<13:23,  2.48it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:11<09:44,  3.40it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:11<05:31,  5.98it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:11<03:32,  9.28it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:11<03:1

Batch had 1706 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/meizu-trinh-lang-pro-6-plus-do-ruc-quyen-ru-hop-hon-nguoi-ham-mo.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1701 more
Batch complete → 167 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 23 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:06<3:25:52,  6.18s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:06<1:28:50,  2.67s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:06<52:38,  1.58s/it]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:07<38:37,  1.16s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:07<19:13,  1.73it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:07<09:47,  3.39it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:07<06:38,  4.99it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:07<05:23,  6.14it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:07<03:25,  9.66it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:08<02:42, 12.22it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:08<02:28, 13.34it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:08<02:29, 13.24it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:08<02:22, 

Batch had 1662 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1657 more
Batch complete → 210 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 24 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:05<3:14:10,  5.83s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:05<1:22:13,  2.47s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:06<35:34,  1.07s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:06<19:44,  1.68it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:06<11:23,  2.91it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:07<10:20,  3.21it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:07<08:55,  3.71it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:07<06:31,  5.08it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:07<04:18,  7.66it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:07<03:34,  9.23it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:07<03:30,  9.42it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:07<02:59, 11.00it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:08<02:42,

Batch had 1740 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='http://maytinhxachtaymy.com/brands/Laptop-Dual%252dCore.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1735 more
Batch complete → 146 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 25 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:07<4:05:02,  7.35s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:07<1:48:48,  3.27s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:08<1:02:53,  1.89s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:08<23:23,  1.42it/s]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:08<18:43,  1.77it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:09<15:21,  2.16it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:09<04:10,  7.90it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:09<03:28,  9.47it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:09<03:46,  8.72it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:09<03:18,  9.94it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:10<02:56, 11.15it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:10<02:45, 11.88it/s][A
Fetching 2000 URLs:   2%|▏         | 37/2000 [00:10<02:28

Batch had 1608 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1603 more
Batch complete → 237 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 26 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:08:39,  2.06s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<23:03,  1.44it/s]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:02<08:57,  3.71it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:02<05:16,  6.28it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:03<04:56,  6.71it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:03<04:54,  6.73it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<05:15,  6.29it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:04<05:28,  6.04it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:04<03:52,  8.52it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:04<02:33, 12.89it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:04<03:39,  8.96it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:05<02:54, 11.30it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:05<03:26,

Batch had 1673 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1668 more
Batch complete → 197 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 27 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:28:15,  2.65s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:02<19:47,  1.68it/s]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:03<16:31,  2.01it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:03<10:19,  3.22it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:03<07:18,  4.54it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:03<07:02,  4.71it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:03<03:55,  8.42it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:04<03:14, 10.22it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:04<06:15,  5.28it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:05<04:51,  6.77it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:05<04:16,  7.70it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:06<03:46,  8.70it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:06<04:15,  

Batch had 1769 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1764 more
Batch complete → 139 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 28 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<58:27,  1.75s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<28:50,  1.15it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<25:43,  1.29it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:02<19:33,  1.70it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:03<13:56,  2.38it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:03<05:29,  6.05it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:03<06:25,  5.15it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:04<05:41,  5.81it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:04<06:16,  5.27it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:04<06:54,  4.79it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:05<04:37,  7.13it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:05<04:42,  6.99it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:05<04:23,  7.51i

Batch had 1649 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.vnexpress.net/tin-tuc/sao/chau-a/jennie-visual-so-mot-cua-black-pink-la-ji-soo-3766301.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1644 more
Batch complete → 227 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 29 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<1:05:42,  1.97s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<33:24,  1.00s/it]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:02<14:36,  2.28it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:03<16:07,  2.06it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:03<11:08,  2.98it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:03<12:11,  2.72it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:04<06:33,  5.06it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:04<05:13,  6.33it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:04<04:53,  6.75it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<04:07,  8.00it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:04<03:43,  8.86it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:05<04:05,  8.05it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:05<03:25,  9

Batch had 1681 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1676 more
Batch complete → 204 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 30 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:25:34,  2.57s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<57:29,  1.73s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:03<13:58,  2.38it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:04<08:56,  3.71it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:04<07:56,  4.17it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:04<05:20,  6.20it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:04<04:41,  7.05it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:05<04:00,  8.25it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:05<05:03,  6.52it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:05<04:15,  7.73it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:05<02:55, 11.25it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:06<02:53, 11.36it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:06<03:01, 

Batch had 1650 failures after retries:
  • unknown: ClientConnectorError – Cannot connect to host 127.0.0.1:80 ssl:default [Connect call failed ('127.0.0.1', 80)]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1645 more
Batch complete → 226 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 31 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:16<8:59:50, 16.20s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:18<4:30:40,  8.13s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:19<1:49:45,  3.30s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:21<1:32:58,  2.80s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:21<1:12:07,  2.17s/it][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:22<51:39,  1.56s/it]  [A
Fetching 2000 URLs:   1%|          | 11/2000 [00:22<20:14,  1.64it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:22<19:34,  1.69it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:23<16:37,  1.99it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:23<07:21,  4.48it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:23<06:01,  5.48it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:23<04:28,  7.34it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:23<0

Batch had 1699 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1694 more
Batch complete → 182 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 32 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:17<9:40:07, 17.41s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:17<4:01:54,  7.26s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:17<1:31:51,  2.76s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:17<50:11,  1.51s/it]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:18<38:21,  1.15s/it][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:18<13:25,  2.47it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:18<07:16,  4.54it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:18<06:22,  5.18it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:19<05:43,  5.76it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:19<05:09,  6.37it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:19<04:25,  7.41it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:19<04:05,  8.03it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:19<03:2

Batch had 1687 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1682 more
Batch complete → 179 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 33 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:06<3:30:21,  6.31s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:07<1:08:57,  2.07s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:08<38:23,  1.15s/it]  [A
Fetching 2000 URLs:   0%|          | 8/2000 [00:08<19:57,  1.66it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:08<14:16,  2.32it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:08<14:00,  2.37it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:09<08:11,  4.04it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:09<06:22,  5.18it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:09<03:36,  9.14it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:10<05:28,  6.01it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:10<05:35,  5.88it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:10<03:59,  8.21it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:10<02:37

Batch had 1712 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1707 more
Batch complete → 174 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 34 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:20:32,  2.42s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<46:10,  1.39s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:03<27:40,  1.20it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:03<24:08,  1.38it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:04<19:34,  1.70it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<10:41,  3.11it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:04<09:17,  3.57it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:04<07:49,  4.24it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:04<06:23,  5.18it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:04<05:49,  5.69it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:05<01:52, 17.55it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:05<02:18, 14.22it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:05<02:56, 11.2

Batch had 1696 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/zenfone-4-max-pro-sap-dap-canh-tai-viet-nam-voi-pin-khung-5000mah-co-gia-duoi-5-trieu-dong.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1691 more
Batch complete → 190 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 35 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<54:45,  1.64s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<25:11,  1.32it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<21:04,  1.58it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:02<14:46,  2.25it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:02<09:30,  3.49it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:02<09:21,  3.55it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:03<09:59,  3.32it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:03<11:34,  2.87it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:04<09:19,  3.55it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:04<06:50,  4.84it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:04<05:44,  5.76it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<04:19,  7.63it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:05<04:33,  7.24it/

Batch had 1713 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/quat-phun-suong-quat-hoi-nuoc/?subcats=Y&features_hash=V5958&utm_source=quat_phun_suong_kangaroo&utm_medium=gia_dung&utm_campaign=seo_tang'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1708 more
Batch complete → 171 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 36 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:19:50,  2.40s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<38:52,  1.17s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<25:00,  1.33it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:03<09:33,  3.48it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:03<06:53,  4.81it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:03<06:11,  5.36it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:03<05:48,  5.70it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:04<07:58,  4.15it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:04<07:55,  4.18it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:04<04:59,  6.62it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:05<04:52,  6.78it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:05<03:22,  9.76it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:05<04:02,  

Batch had 1706 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1701 more
Batch complete → 167 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 37 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:09<5:06:23,  9.20s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:09<2:16:24,  4.10s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:12<1:48:41,  3.27s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:12<1:10:11,  2.11s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:12<48:04,  1.45s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:13<36:01,  1.08s/it][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:13<27:10,  1.22it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:13<20:38,  1.61it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:13<15:58,  2.08it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:14<15:52,  2.09it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:14<02:52, 11.47it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:14<03:19,  9.91it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:15<04:03,

Batch had 1703 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1698 more
Batch complete → 187 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 38 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<1:57:56,  3.54s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<56:10,  1.69s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:04<32:23,  1.03it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:04<12:58,  2.56it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:04<09:19,  3.56it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:04<05:17,  6.26it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:04<04:28,  7.41it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:05<02:35, 12.69it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:05<02:45, 11.92it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:05<02:16, 14.43it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:05<02:03, 15.88it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:05<01:49, 17.96it/s][A
Fetching 2000 URLs:   2%|▏         | 38/2000 [00:06<02:09, 1

Batch had 1655 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1650 more
Batch complete → 228 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 39 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<1:43:54,  3.12s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<45:18,  1.36s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:03<26:50,  1.24it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:03<06:11,  5.36it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:03<05:16,  6.28it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:03<04:15,  7.77it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:04<03:49,  8.63it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<03:39,  9.02it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:04<03:02, 10.87it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:04<02:42, 12.17it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:04<02:11, 14.99it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:04<02:06, 15.58it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:04<01:48, 

Batch had 1665 failures after retries:
  • unknown: ClientResponseError – 400, message='Got more than 8190 bytes (8404) when reading Header value is too long.', url='https://twitter.com/hashtag/A7ii?src=hash'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1660 more
Batch complete → 222 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 40 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<28:18,  1.18it/s][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<17:31,  1.90it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<14:44,  2.26it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:01<10:32,  3.16it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:01<04:03,  8.18it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:01<03:16, 10.12it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:01<02:22, 13.90it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:02<02:20, 14.14it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:02<01:36, 20.58it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:02<01:36, 20.54it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:02<01:23, 23.64it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:02<01:57, 16.73it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:03<02:21, 13.86

Batch had 1621 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.tvvn.org/nghe-nhac-va-nhin-van-hoa-cua-cong-san-dan-den/'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1616 more
Batch complete → 239 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 41 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<36:19,  1.09s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<12:16,  2.71it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:01<04:42,  7.06it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:01<04:58,  6.67it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:01<03:39,  9.06it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:02<03:07, 10.57it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:02<03:00, 10.99it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:02<03:07, 10.59it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:02<02:49, 11.70it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:03<02:07, 15.45it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:03<01:55, 17.09it/s][A
Fetching 2000 URLs:   2%|▏         | 36/2000 [00:03<01:49, 17.94it/s][A
Fetching 2000 URLs:   2%|▏         | 40/2000 [00:03<01:30, 21.5

Batch had 1699 failures after retries:
  • unknown: ClientConnectorDNSError – Cannot connect to host www.fontasian.com:443 ssl:default [nodename nor servname provided, or not known]
  • unknown: TooManyRedirects – 0, message='', url='http://nguoivietnamchau.com/forums/showthread.php?s=d30b64dcae9c4e723d34656ae251f208&t=12701'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1694 more
Batch complete → 179 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 42 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<24:54,  1.34it/s][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<20:04,  1.66it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<12:34,  2.65it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:01<06:51,  4.85it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:01<03:28,  9.57it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:01<02:26, 13.54it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:02<02:18, 14.30it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:02<02:09, 15.28it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:02<02:19, 14.17it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:02<02:55, 11.26it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:02<03:04, 10.74it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:02<03:10, 10.35it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:03<02:25, 13.5

Batch had 1769 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1764 more
Batch complete → 133 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 43 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<1:00:58,  1.83s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<27:06,  1.23it/s]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:02<08:41,  3.83it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:02<04:21,  7.60it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:02<02:48, 11.77it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:02<01:51, 17.70it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:02<02:04, 15.92it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:02<01:48, 18.23it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:03<02:13, 14.80it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:03<02:30, 13.05it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:03<02:18, 14.16it/s][A
Fetching 2000 URLs:   2%|▏         | 38/2000 [00:03<02:09, 15.13it/s][A
Fetching 2000 URLs:   2%|▏         | 40/2000 [00:04<03:20, 

Batch had 1698 failures after retries:
  • unknown: ClientResponseError – 400, message='Got more than 8190 bytes (8943) when reading Header value is too long.', url='https://x.com/expired_domai9/status/1999366216956223714?s=20'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1693 more
Batch complete → 186 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 44 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<1:44:44,  3.14s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<51:54,  1.56s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:03<12:42,  2.61it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:03<08:52,  3.74it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:03<05:43,  5.80it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:04<03:15, 10.15it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:04<02:41, 12.26it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:04<02:51, 11.51it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:04<02:23, 13.74it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:04<02:06, 15.57it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:04<01:51, 17.66it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:05<02:46, 11.82it/s][A
Fetching 2000 URLs:   2%|▏         | 36/2000 [00:05<03:13, 

Batch had 1655 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1650 more
Batch complete → 216 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 45 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<2:07:51,  3.84s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:04<1:04:00,  1.92s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:04<37:02,  1.11s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:04<13:45,  2.42it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:05<10:09,  3.27it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:05<05:45,  5.75it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:05<03:30,  9.43it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:05<02:56, 11.20it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:05<03:12, 10.25it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:05<02:33, 12.90it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:06<02:29, 13.16it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:06<02:47, 11.76it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:06<02:56,

Batch had 1693 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.tvvn.org/cuoc-vuot-nguc-dam-mau-trai-tu-gia-trung-nguyen-hoang-son/'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1688 more
Batch complete → 193 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 46 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<35:26,  1.06s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<18:53,  1.76it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<12:53,  2.58it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:01<09:47,  3.39it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:02<11:11,  2.97it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:02<06:22,  5.22it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:02<06:42,  4.95it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:02<03:43,  8.90it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:02<03:35,  9.20it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:03<03:11, 10.34it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:03<02:57, 11.15it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:03<02:39, 12.44it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:03<02:47, 11.79it

Batch had 1706 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.net/tin-tuc/nhip-song/co-giao-xinh-xan-day-tieng-anh-qua-minh-yeu-nhau-di-2960563.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1701 more
Batch complete → 191 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 47 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<1:02:00,  1.86s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<27:38,  1.20it/s]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:02<11:40,  2.85it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:02<09:16,  3.59it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:02<05:42,  5.82it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:02<03:23,  9.76it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:02<03:31,  9.38it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:02<03:38,  9.07it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:03<03:19,  9.97it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:03<02:57, 11.14it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:03<02:02, 16.13it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:03<02:16, 14.51it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:03<02:35, 1

Batch had 1667 failures after retries:
  • unknown: ClientConnectorError – Cannot connect to host www.vncgarden.com:443 ssl:default [None]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1662 more
Batch complete → 214 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 48 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<29:08,  1.14it/s][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<17:08,  1.94it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<11:18,  2.94it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:01<08:21,  3.98it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:01<03:23,  9.78it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:01<02:32, 13.06it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:01<02:06, 15.71it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:02<01:48, 18.26it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:02<01:48, 18.15it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:02<01:47, 18.37it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:02<01:26, 22.65it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:02<01:09, 28.11it/s][A
Fetching 2000 URLs:   2%|▏         | 39/2000 [00:02<01:37, 20.03

Batch had 1647 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host frozenlabs.net:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1017)')]
  • unknown: ClientConnectorCertificateError – Cannot connect to host ym7.vishnu-vardhan.com:443 ssl:True [SSLCertVerificationError: (1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'ym7.vishnu-vardhan.com'. (_ssl.c:1017)")]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1642 more
Batch complete → 236 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 49 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<2:05:34,  3.77s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:04<1:00:01,  1.80s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:04<24:23,  1.36it/s]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<11:43,  2.83it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:04<07:20,  4.52it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:04<04:31,  7.32it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:05<03:54,  8.46it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:05<04:15,  7.76it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:05<03:12, 10.25it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:05<02:57, 11.13it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:05<02:45, 11.92it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:05<02:31, 13.01it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:06<02:36

Batch had 1707 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.vnexpress.net/tin-tuc/phim/angelababy-lo-tao-hinh-xinh-dep-trong-van-trung-ca-2745182.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1702 more
Batch complete → 186 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 50 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<2:04:34,  3.74s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<55:54,  1.68s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:04<34:39,  1.04s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:04<26:13,  1.27it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:04<18:42,  1.78it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:05<08:43,  3.80it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:05<04:35,  7.21it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:05<04:41,  7.05it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:05<03:59,  8.29it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:06<04:19,  7.63it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:06<02:55, 11.23it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:06<02:25, 13.57it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:06<02:16, 14

Batch had 1768 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/cong-dong-samfans-day-song-vi-uu-dai-khung-khi-mua-galaxy-j7.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1763 more
Batch complete → 146 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 51 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:06<3:27:34,  6.23s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:06<1:30:18,  2.71s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:06<34:48,  1.05s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:06<26:02,  1.28it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:06<08:55,  3.72it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:07<04:57,  6.66it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:07<04:21,  7.57it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:07<03:49,  8.63it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:07<03:26,  9.57it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:07<03:31,  9.36it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:08<02:36, 12.59it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:08<02:23, 13.71it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:08<03:01

Batch had 1742 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.net/so-nguc-nhay-khieu-dam-quyen-tien-tu-thien-1974457.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1737 more
Batch complete → 151 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 52 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:06<3:34:17,  6.43s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:06<1:30:15,  2.71s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:07<1:05:35,  1.97s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:08<45:39,  1.37s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:08<25:28,  1.30it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:08<21:19,  1.56it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:09<21:24,  1.55it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:10<20:33,  1.61it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:10<15:38,  2.12it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:10<10:07,  3.27it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:11<13:45,  2.41it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:11<07:46,  4.25it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:11<07:56, 

Batch had 1784 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1779 more
Batch complete → 134 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 53 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<51:56,  1.56s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:01<11:09,  2.98it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:01<07:21,  4.52it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:02<03:41,  8.98it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:02<03:23,  9.76it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:02<03:48,  8.69it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:02<01:56, 17.00it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:02<02:01, 16.25it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:03<01:56, 16.90it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:03<01:47, 18.32it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:03<02:00, 16.33it/s][A
Fetching 2000 URLs:   2%|▏         | 37/2000 [00:03<01:58, 16.53it/s][A
Fetching 2000 URLs:   2%|▏         | 39/2000 [00:03<01:54, 17.

Batch had 1691 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host thetindung.com.vn:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1017)')]
  • unknown: TooManyRedirects – 0, message='', url='http://www.vienphauthuat.com/kham-pha-cac-phuong-phap-nang-nguc-tu-nhien-an-toan.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1686 more
Batch complete → 201 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 54 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<15:19,  2.17it/s][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<20:04,  1.66it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:01<04:52,  6.82it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:01<03:55,  8.44it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:01<02:39, 12.43it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:01<02:41, 12.29it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:01<02:21, 14.04it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:02<02:09, 15.33it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:02<02:36, 12.64it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:02<01:57, 16.84it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:02<01:45, 18.59it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:02<01:55, 17.01it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:02<02:01, 16.1

Batch had 1781 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.net/tin-tuc/nhip-song/hong/quang-hai-khoe-anh-khoac-vai-ban-gai-moi-4098438.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1776 more
Batch complete → 141 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 55 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<27:17,  1.22it/s][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<22:48,  1.46it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:01<13:59,  2.38it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:02<06:48,  4.87it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:02<03:32,  9.37it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:02<02:54, 11.37it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:02<02:13, 14.78it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:02<02:07, 15.45it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:03<02:30, 13.12it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:03<02:20, 13.99it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:03<02:10, 15.02it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:03<02:26, 13.42it/s][A
Fetching 2000 URLs:   2%|▏         | 39/2000 [00:03<01:50, 17.6

Batch had 1727 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='http://vietmoz.net/cac-tieu-chi-xep-hang-cua-google-phan-5-noi-dung-trung-lap/?replytocom=2566'
  • unknown: TooManyRedirects – 0, message='', url='http://www.tvvn.org/baywatch-nguyen-ngoc-chan/'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1722 more
Batch complete → 171 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 56 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<48:29,  1.46s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:01<10:15,  3.24it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:01<05:25,  6.12it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:01<03:15, 10.18it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:01<02:33, 12.94it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:02<02:07, 15.57it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:02<01:56, 16.97it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:02<01:54, 17.31it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:02<01:40, 19.56it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:02<01:42, 19.23it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:02<01:20, 24.32it/s][A
Fetching 2000 URLs:   2%|▏         | 37/2000 [00:02<01:18, 25.02it/s][A
Fetching 2000 URLs:   2%|▏         | 40/2000 [00:03<01:21, 23.

Batch had 1699 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.tvvn.org/niem-vui-trong-thoi-dai-da-chung-toc-da-van-hoa-doan-thanh-liem/'
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/can-loi-voi-nhung-cach-nuong-thit-cua-nhung-dau-bep-tai-ba.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1694 more
Batch complete → 175 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 57 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<29:29,  1.13it/s][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<14:48,  2.25it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:01<05:13,  6.35it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:01<05:20,  6.23it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:01<03:30,  9.48it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:01<02:36, 12.72it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:01<02:24, 13.69it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:02<02:24, 13.68it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:02<02:11, 15.05it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:02<01:50, 17.86it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:02<01:48, 18.13it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:02<01:47, 18.35it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:03<05:30,  5.9

Batch had 1690 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host www.askives.com:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1017)')]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1685 more
Batch complete → 193 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 58 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<2:03:18,  3.70s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:04<27:07,  1.23it/s]  [A
Fetching 2000 URLs:   0%|          | 8/2000 [00:04<11:19,  2.93it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:04<07:28,  4.43it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:04<06:29,  5.10it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:04<04:32,  7.27it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<04:03,  8.13it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:05<03:56,  8.37it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:05<03:35,  9.17it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:05<03:30,  9.39it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:05<03:11, 10.33it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:05<02:24, 13.62it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:06<02:53,

Batch had 1713 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='http://mangcapquangfpt.com/bai-viet/25-lap-dat-mang-fpt-quan-12-tai-dia-ban-tphcm.html?s=f70d7b103471e10066c9be2af9cb5f69&mode=hybrid'
  • unknown: TooManyRedirects – 0, message='', url='http://www.changevn.org/tin-tuc/140-hay-len-tieng-hay-bao-ve-te-giac-cung-cac-doanh-nhan-bac-si-chinh-tri-gia-viet-nam'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1708 more
Batch complete → 184 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 59 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:11<6:22:38, 11.48s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:12<2:51:19,  5.14s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:12<1:07:17,  2.02s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:12<47:32,  1.43s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:12<34:14,  1.03s/it][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:13<36:22,  1.09s/it][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:14<24:08,  1.37it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:14<15:11,  2.18it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:15<15:02,  2.20it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:16<16:36,  1.99it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:16<08:49,  3.74it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:17<08:24,  3.93it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:17<07:40,

Batch had 1718 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1713 more
Batch complete → 171 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 60 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:08<4:49:13,  8.68s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:09<2:08:12,  3.85s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:09<1:12:04,  2.17s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:09<45:52,  1.38s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:09<32:42,  1.02it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:09<17:09,  1.94it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:10<10:52,  3.05it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:10<08:58,  3.69it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:10<05:34,  5.93it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:11<06:39,  4.97it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:11<05:35,  5.91it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:11<04:39,  7.09it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:11<05:10,

Batch had 1705 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host timbenhvien.info:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate (_ssl.c:1017)')]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1700 more
Batch complete → 183 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 61 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:11<6:25:09, 11.56s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:12<2:51:57,  5.16s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:12<1:37:39,  2.93s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:13<1:06:12,  1.99s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:14<55:20,  1.66s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:14<40:25,  1.22s/it][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:14<29:00,  1.14it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:14<17:11,  1.93it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:15<13:44,  2.41it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:15<10:19,  3.21it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:15<07:09,  4.62it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:15<08:02,  4.11it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:16<10:19

Batch had 1765 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1760 more
Batch complete → 150 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 62 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:22<12:25:56, 22.39s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:23<5:36:06, 10.09s/it] [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:24<3:10:01,  5.71s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:24<1:23:25,  2.51s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:25<1:06:24,  2.00s/it][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:25<38:04,  1.15s/it]  [A
Fetching 2000 URLs:   0%|          | 9/2000 [00:26<37:45,  1.14s/it][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:26<23:15,  1.42it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:26<19:07,  1.73it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:26<15:29,  2.14it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:27<12:41,  2.61it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:27<12:02,  2.75it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:27<

Batch had 1809 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1804 more
Batch complete → 120 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 63 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:04<2:18:18,  4.15s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:04<31:18,  1.06it/s]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:05<33:25,  1.01s/it][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:06<16:09,  2.05it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:06<13:44,  2.41it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:07<17:05,  1.94it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:07<03:54,  8.44it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:08<05:10,  6.35it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:10<09:51,  3.34it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:10<07:16,  4.51it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:10<06:59,  4.69it/s][A
Fetching 2000 URLs:   2%|▏         | 37/2000 [00:10<04:48,  6.79it/s][A
Fetching 2000 URLs:   2%|▏         | 39/2000 [00:10<04:11,  

Batch had 1774 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1769 more
Batch complete → 138 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 64 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<1:46:24,  3.19s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<57:32,  1.73s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:04<32:57,  1.01it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:04<28:42,  1.16it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:05<21:53,  1.52it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:05<18:08,  1.83it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:06<17:01,  1.95it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:06<15:37,  2.12it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:07<10:06,  3.28it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:07<08:05,  4.09it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:07<04:33,  7.25it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:08<04:22,  7.52it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:08<03:50,  8.5

Batch had 1767 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1762 more
Batch complete → 149 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 65 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:09<5:17:53,  9.54s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:09<2:15:29,  4.07s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:10<1:23:47,  2.52s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:10<54:34,  1.64s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:10<37:04,  1.11s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:11<33:50,  1.02s/it][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:12<31:20,  1.06it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:12<23:01,  1.44it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:13<14:33,  2.28it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:13<06:38,  4.98it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:13<06:37,  4.99it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:13<05:48,  5.68it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:13<04:37, 

Batch had 1806 failures after retries:
  • unknown: ClientConnectorDNSError – Cannot connect to host cachvaow88.com:443 ssl:default [nodename nor servname provided, or not known]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1801 more
Batch complete → 130 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 66 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:06<3:37:10,  6.52s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:06<1:32:00,  2.76s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:07<59:22,  1.78s/it]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:07<40:57,  1.23s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:08<36:28,  1.10s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:09<34:31,  1.04s/it][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:09<28:42,  1.16it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:10<30:17,  1.10it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:11<19:51,  1.67it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:11<06:11,  5.34it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:12<07:27,  4.42it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:13<08:07,  4.06it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:13<05:51,  5

Batch had 1738 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.net/tin-tuc/nhip-song/bo-anh-follow-me-cua-doi-ban-tre-viet-gay-sot-2938335.html?ctr=related_news_click'
  • unknown: ClientConnectorError – Cannot connect to host www.vncgarden.com:443 ssl:default [None]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1733 more
Batch complete → 175 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 67 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:20:08,  2.41s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<26:32,  1.25it/s]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:03<19:23,  1.72it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:03<15:16,  2.17it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<17:08,  1.94it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:04<13:26,  2.47it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:05<11:16,  2.94it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:05<10:12,  3.25it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:05<08:07,  4.08it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:05<04:35,  7.19it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:06<04:05,  8.06it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:06<03:44,  8.79it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:06<03:03, 10

Batch had 1739 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1734 more
Batch complete → 167 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 68 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:16<8:57:59, 16.15s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:16<3:45:44,  6.78s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:16<1:28:28,  2.66s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:17<1:06:36,  2.00s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:18<56:10,  1.69s/it]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:18<42:18,  1.27s/it][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:19<27:34,  1.20it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:19<17:36,  1.88it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:19<09:23,  3.52it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:19<06:15,  5.27it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:20<05:11,  6.35it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:20<04:44,  6.95it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:20<03:5

Batch had 1771 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1766 more
Batch complete → 143 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 69 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:11<6:29:30, 11.69s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:12<2:46:46,  5.01s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:12<1:43:35,  3.11s/it][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:13<25:21,  1.31it/s]  [A
Fetching 2000 URLs:   0%|          | 10/2000 [00:13<18:29,  1.79it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:13<11:43,  2.82it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:13<10:01,  3.30it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:13<07:59,  4.14it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:14<06:48,  4.85it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:14<04:55,  6.69it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:14<03:58,  8.28it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:14<04:53,  6.72it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:15<03:

Batch had 1657 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1652 more
Batch complete → 216 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 70 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:09<5:04:58,  9.15s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:10<2:23:46,  4.32s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:10<57:49,  1.74s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:10<32:27,  1.02it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:10<20:24,  1.63it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:11<17:43,  1.87it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:11<14:20,  2.31it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:11<15:13,  2.18it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:12<10:48,  3.06it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:12<06:23,  5.18it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:12<07:13,  4.57it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:12<07:07,  4.63it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:13<09:03, 

Batch had 1702 failures after retries:
  • unknown: ClientConnectorDNSError – Cannot connect to host sangkienkinhnghiemhay.net$:443 ssl:default [nodename nor servname provided, or not known]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1697 more
Batch complete → 188 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 71 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:04<2:46:28,  5.00s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:05<1:11:35,  2.15s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:05<32:13,  1.03it/s]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:05<18:00,  1.85it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:06<17:42,  1.88it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:06<15:10,  2.19it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:06<08:56,  3.71it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:07<08:26,  3.93it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:07<08:36,  3.85it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:07<09:27,  3.50it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:08<11:15,  2.94it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:08<09:00,  3.67it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:09<08:18, 

Batch had 1692 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1687 more
Batch complete → 195 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 72 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:19:33,  2.39s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<48:16,  1.45s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:03<28:27,  1.17it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:03<24:03,  1.38it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:04<17:14,  1.93it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:04<08:40,  3.83it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:04<08:34,  3.87it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:04<07:27,  4.45it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:04<06:39,  4.97it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:05<07:03,  4.69it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:05<04:17,  7.68it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:05<04:35,  7.19it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:06<04:23,  7.

Batch had 1756 failures after retries:
  • unknown: ClientConnectorError – Cannot connect to host www.vncgarden.com:443 ssl:default [None]
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/may-lanh-panasonic-1-hp-cu-cs-n9ukh-8.html'
  • unknown: TooManyRedirects – 0, message='', url='https://www.tvvn.org/nguoi-viet-dau-tien-chet-vi-covid-19-o-duc-dan-chim-viet/'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1751 more
Batch complete → 166 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 73 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<48:24,  1.45s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<24:43,  1.35it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:02<17:53,  1.86it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:02<10:59,  3.02it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:02<07:18,  4.54it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:02<02:49, 11.73it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:03<03:09, 10.46it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:03<03:01, 10.88it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:03<02:59, 11.01it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:03<02:23, 13.72it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:04<03:12, 10.23it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:04<02:51, 11.47it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:04<02:51, 11.43

Batch had 1737 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1732 more
Batch complete → 166 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 74 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:07<4:19:14,  7.78s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:08<1:14:08,  2.23s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:08<36:56,  1.11s/it]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:08<22:29,  1.48it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:08<10:41,  3.10it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:09<09:59,  3.32it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:09<07:53,  4.19it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:09<04:43,  6.99it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:09<03:56,  8.37it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:10<03:39,  8.99it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:10<03:25,  9.62it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:10<02:00, 16.37it/s][A
Fetching 2000 URLs:   2%|▏         | 37/2000 [00:10<02:26

Batch had 1622 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://ione.net/tin-tuc/thoi-trang/kstyle/phong-cach-quy-toc-dep-xuat-sac-cua-iu-trong-drama-dang-hot-3953758.html?ctr=related_news_click'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1617 more
Batch complete → 228 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 75 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:04<2:35:20,  4.66s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:04<41:36,  1.25s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:05<23:49,  1.40it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:05<15:13,  2.18it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:05<08:53,  3.73it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:05<04:51,  6.81it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:05<04:20,  7.61it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:06<03:52,  8.52it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:06<03:34,  9.21it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:06<03:11, 10.30it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:06<03:08, 10.49it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:07<03:12, 10.26it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:07<02:54, 

Batch had 1747 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/loa-vi-tinh-fenda-f3800x.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1742 more
Batch complete → 165 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 76 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:04<2:13:36,  4.01s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:04<1:09:34,  2.09s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:05<31:33,  1.05it/s]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:05<14:16,  2.33it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:05<10:01,  3.31it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:05<04:30,  7.33it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:05<04:02,  8.16it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:06<04:38,  7.10it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:07<06:54,  4.77it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:07<05:45,  5.71it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:07<05:30,  5.98it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:07<03:14, 10.13it/s][A
Fetching 2000 URLs:   2%|▏         | 36/2000 [00:08<03:12,

Batch had 1716 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1711 more
Batch complete → 175 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 77 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<56:03,  1.68s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<33:55,  1.02s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<24:06,  1.38it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:03<19:47,  1.68it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:03<19:39,  1.69it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:03<08:50,  3.76it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:03<06:39,  4.98it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:04<06:10,  5.36it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:04<05:16,  6.29it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:04<03:51,  8.57it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<03:27,  9.57it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:04<02:39, 12.39it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:04<02:27, 13.42i

Batch had 1759 failures after retries:
  • unknown: ClientResponseError – 400, message='Got more than 8190 bytes (8825) when reading Header value is too long.', url='https://twitter.com/hashtag/%EC%95%84%EB%A6%AC%EC%95%84%EC%A6%88?src=hash'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1754 more
Batch complete → 166 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 78 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<50:14,  1.51s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<25:19,  1.31it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<16:06,  2.07it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:02<14:11,  2.34it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:02<04:28,  7.41it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:02<05:15,  6.31it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:03<04:07,  8.02it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:03<03:52,  8.52it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:03<02:57, 11.17it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:03<02:49, 11.65it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:03<03:22,  9.77it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:03<03:06, 10.57it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:04<02:00, 16.37

Batch had 1709 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1704 more
Batch complete → 181 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 79 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:11:36,  2.15s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:02<12:09,  2.74it/s]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:02<08:26,  3.93it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:02<05:21,  6.19it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:02<04:58,  6.65it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:03<04:55,  6.71it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:03<03:18,  9.98it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:03<02:30, 13.12it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:03<01:56, 17.01it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:03<02:17, 14.37it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:04<02:19, 14.08it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:04<02:07, 15.42it/s][A
Fetching 2000 URLs:   2%|▏         | 37/2000 [00:04<02:01,

Batch had 1695 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/sony-bat-ngo-ra-mat-smartphone-co-ten-goi-pikachu.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1690 more
Batch complete → 200 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 80 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [30:12<1006:26:48, 1812.51s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [30:12<414:16:35, 746.44s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [30:13<225:08:17, 405.86s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [30:13<136:14:55, 245.74s/it][A
Fetching 2000 URLs:   0%|          | 8/2000 [30:14<41:49:16, 75.58s/it]  [A
Fetching 2000 URLs:   1%|          | 11/2000 [30:14<23:58:46, 43.40s/it][A
Fetching 2000 URLs:   1%|          | 14/2000 [30:14<14:53:48, 27.00s/it][A
Fetching 2000 URLs:   1%|          | 16/2000 [30:14<10:57:24, 19.88s/it][A
Fetching 2000 URLs:   1%|          | 18/2000 [30:14<7:57:07, 14.44s/it] [A
Fetching 2000 URLs:   1%|          | 20/2000 [30:15<5:43:29, 10.41s/it][A
Fetching 2000 URLs:   1%|          | 23/2000 [30:15<3:34:43,  6.52s/it][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [30:15<2:20:03,  4.26s/it][A
Fetching 2000 URL

Batch had 1653 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1648 more
Batch complete → 226 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 81 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:05<3:11:55,  5.76s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:06<1:26:48,  2.61s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:06<53:45,  1.62s/it]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:06<15:47,  2.10it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:06<11:13,  2.95it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:07<05:29,  6.03it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:07<04:57,  6.67it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:07<03:59,  8.27it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:07<03:07, 10.57it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:07<02:40, 12.30it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:07<02:15, 14.54it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:08<02:09, 15.23it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:08<02:09,

Batch had 1717 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1712 more
Batch complete → 186 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 82 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:14<7:50:45, 14.13s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:14<3:29:00,  6.28s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:15<2:07:12,  3.82s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:15<1:18:44,  2.37s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:16<41:51,  1.26s/it]  [A
Fetching 2000 URLs:   0%|          | 7/2000 [00:17<38:40,  1.16s/it][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:17<30:33,  1.09it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:18<20:44,  1.60it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:18<13:19,  2.49it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:18<12:16,  2.70it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:19<15:35,  2.12it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:20<10:36,  3.11it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:20<08:3

Batch had 1724 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1719 more
Batch complete → 181 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 83 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<2:05:55,  3.78s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:04<1:04:18,  1.93s/it][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:04<19:09,  1.74it/s]  [A
Fetching 2000 URLs:   0%|          | 8/2000 [00:04<10:33,  3.15it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:05<08:37,  3.85it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:05<06:50,  4.85it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:05<06:05,  5.43it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:05<05:43,  5.77it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:05<05:39,  5.85it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:06<05:45,  5.73it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:06<05:45,  5.74it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:06<05:56,  5.55it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:07<06:04

Batch had 1697 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host 5tfoods.com:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1017)')]
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1692 more
Batch complete → 180 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 84 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:31:20,  2.74s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<42:04,  1.26s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:03<17:08,  1.94it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<13:15,  2.50it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:04<06:06,  5.42it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:04<06:49,  4.85it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:04<05:34,  5.93it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:05<06:26,  5.13it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:05<03:44,  8.82it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:06<04:35,  7.16it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:06<04:33,  7.22it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:06<03:52,  8.48it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:06<02:45, 

Batch had 1739 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='http://ione.vnexpress.net/tin-tuc/tu-che/lo-mo/gap-vay-hoa-tao-thiep-handmade-cuc-dang-iu-1947155.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1734 more
Batch complete → 150 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 85 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<1:03:41,  1.91s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<30:34,  1.09it/s]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<23:22,  1.42it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:02<12:11,  2.73it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:02<08:11,  4.05it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:03<07:25,  4.47it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:03<02:56, 11.24it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:03<01:59, 16.54it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:03<02:46, 11.90it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:04<02:56, 11.17it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:04<03:03, 10.77it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:04<03:19,  9.88it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:04<02:39, 12

Batch had 1759 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1754 more
Batch complete → 142 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 86 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<1:51:51,  3.36s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<48:38,  1.46s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:03<28:51,  1.15it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:04<25:45,  1.29it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<11:15,  2.95it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:04<09:25,  3.52it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:04<09:23,  3.53it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:05<04:13,  7.81it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:05<03:17, 10.01it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:05<02:59, 11.04it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:05<03:00, 10.97it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:06<03:44,  8.79it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:06<03:41,  8.

Batch had 1659 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1654 more
Batch complete → 230 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 87 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:05<3:02:59,  5.49s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:05<51:18,  1.54s/it]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:06<38:58,  1.17s/it][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:06<17:26,  1.90it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:06<09:17,  3.57it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:06<07:17,  4.54it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:07<05:58,  5.54it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:07<05:22,  6.14it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:07<04:00,  8.23it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:07<03:38,  9.04it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:08<02:13, 14.71it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:08<02:41, 12.16it/s][A
Fetching 2000 URLs:   2%|▏         | 36/2000 [00:08<02:30, 

Batch had 1615 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1610 more
Batch complete → 244 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 88 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:04<2:32:15,  4.57s/it][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:04<19:25,  1.71it/s]  [A
Fetching 2000 URLs:   0%|          | 9/2000 [00:04<12:10,  2.73it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:05<07:25,  4.46it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:05<04:22,  7.53it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:05<04:02,  8.16it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:05<03:51,  8.53it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:06<03:26,  9.56it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:06<03:07, 10.52it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:06<02:57, 11.07it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:06<02:22, 13.82it/s][A
Fetching 2000 URLs:   2%|▏         | 37/2000 [00:06<02:23, 13.64it/s][A
Fetching 2000 URLs:   2%|▏         | 39/2000 [00:06<02:29,

Batch had 1642 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/muon-bao-quan-thuc-pham-tuoi-ngon-hoc-ngay-9-bi-quyet-nay.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1637 more
Batch complete → 217 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 89 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<51:37,  1.55s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<25:59,  1.28it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:02<05:21,  6.20it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:02<05:17,  6.26it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:02<04:11,  7.90it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:02<03:26,  9.60it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:02<02:15, 14.55it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:03<02:24, 13.63it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:03<02:31, 13.00it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:03<01:59, 16.53it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:03<02:13, 14.72it/s][A
Fetching 2000 URLs:   2%|▏         | 36/2000 [00:03<02:05, 15.67it/s][A
Fetching 2000 URLs:   2%|▏         | 38/2000 [00:04<02:53, 11.

Batch had 1660 failures after retries:
  • unknown: ClientConnectorCertificateError – Cannot connect to host www.dietcontrungquocte.com:443 ssl:True [SSLCertVerificationError: (1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'www.dietcontrungquocte.com'. (_ssl.c:1017)")]
  • unknown: ClientConnectorCertificateError – Cannot connect to host canhquansanvuon24h.com:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1017)')]
  • unknown: ClientConnectorDNSError – Cannot connect to host mesuttransport.com:443 ssl:default [nodename nor servname provided, or not known]
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/may-lanh-daikin-fthf25rvmv-rhf25rvmv.html'
  • unknown: TimeoutError – 
  … and 1655 more
Batch complete → 220 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 90 (2000


Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:08:19,  2.05s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<39:43,  1.19s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<23:37,  1.41it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:03<06:08,  5.40it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:03<04:54,  6.75it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:03<04:22,  7.58it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:03<03:10, 10.43it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:03<02:28, 13.33it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:03<03:03, 10.77it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:04<02:55, 11.23it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:04<02:26, 13.45it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:04<03:17,  9.96it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:05<03:34, 

Batch had 1695 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1690 more
Batch complete → 202 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 91 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:07<4:13:01,  7.59s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:08<1:12:00,  2.16s/it][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:08<51:35,  1.55s/it]  [A
Fetching 2000 URLs:   0%|          | 6/2000 [00:08<27:57,  1.19it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:09<08:01,  4.13it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:09<06:41,  4.94it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:09<05:46,  5.73it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:09<04:27,  7.39it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:09<03:37,  9.06it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:09<03:29,  9.41it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:10<03:22,  9.72it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:10<03:27,  9.48it/s][A
Fetching 2000 URLs:   2%|▏         | 34/2000 [00:10<03:07

Batch had 1695 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1690 more
Batch complete → 190 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 92 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:39:18,  2.98s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<51:53,  1.56s/it]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:03<22:59,  1.45it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:04<17:53,  1.86it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<10:58,  3.03it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:04<03:45,  8.79it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:04<03:12, 10.32it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:04<03:04, 10.74it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:04<02:58, 11.06it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:05<02:32, 12.96it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:05<02:09, 15.23it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:05<02:06, 15.51it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:05<02:02, 1

Batch had 1673 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='https://www.nguyenkim.com/lo-nuong-sanaky-vh-359n.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1668 more
Batch complete → 214 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 93 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:04<2:19:03,  4.17s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:04<1:01:57,  1.86s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:04<35:43,  1.07s/it]  [A
Fetching 2000 URLs:   0%|          | 5/2000 [00:04<18:20,  1.81it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:04<07:31,  4.41it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:05<07:13,  4.58it/s][A
Fetching 2000 URLs:   1%|          | 14/2000 [00:05<05:40,  5.84it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:05<04:55,  6.70it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:06<03:13, 10.25it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:06<02:52, 11.44it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:06<03:16, 10.03it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:06<03:01, 10.87it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:07<04:30,

Batch had 1705 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1700 more
Batch complete → 179 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 94 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<48:52,  1.47s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<39:52,  1.20s/it][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:02<07:05,  4.68it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:02<06:07,  5.41it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:03<05:24,  6.13it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:03<04:49,  6.85it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:03<04:00,  8.24it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:03<03:45,  8.79it/s][A
Fetching 2000 URLs:   1%|          | 23/2000 [00:03<03:00, 10.96it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:04<03:18,  9.96it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:04<03:01, 10.84it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:04<03:22,  9.72it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:04<03:26,  9.

Batch had 1639 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='http://thaidaiviet.com/index.php/package-list/tour-n-i-c-ngoai-i/item/52-tour-campuchia-laiao'
  • unknown: TooManyRedirects – 0, message='', url='https://ione.vnexpress.net/tin-tuc/lam-dep/makeup/nhung-man-hoa-trang-halloween-cuc-xin-cua-idol-han-3663136.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1634 more
Batch complete → 229 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 95 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<47:40,  1.43s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:01<23:41,  1.41it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<15:25,  2.16it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:01<05:58,  5.56it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:02<05:22,  6.19it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:02<04:37,  7.16it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:02<03:56,  8.40it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:02<04:13,  7.83it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:03<05:55,  5.58it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:03<06:09,  5.37it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:04<04:01,  8.20it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:04<03:52,  8.48it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:04<03:07, 10.50

Batch had 1588 failures after retries:
  • unknown: ClientConnectorError – Cannot connect to host www.webdanang.com:443 ssl:default [None]
  • unknown: ClientConnectorCertificateError – Cannot connect to host chaobuoisang.net:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1017)')]
  • unknown: TooManyRedirects – 0, message='', url='https://www.tvvn.org/the-angry-birds-movie-2-nguyen-ngoc-chan/'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1583 more
Batch complete → 274 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 96 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:27:39,  2.63s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:02<39:40,  1.19s/it]  [A
Fetching 2000 URLs:   0%|          | 3/2000 [00:02<23:36,  1.41it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:03<13:57,  2.38it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:03<06:17,  5.28it/s][A
Fetching 2000 URLs:   1%|          | 11/2000 [00:03<04:57,  6.69it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:03<03:41,  8.97it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<02:55, 11.28it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:04<03:12, 10.28it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:04<02:20, 14.04it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:04<02:21, 13.91it/s][A
Fetching 2000 URLs:   2%|▏         | 32/2000 [00:04<01:40, 19.49it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:05<02:15, 1

Batch had 1711 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1706 more
Batch complete → 184 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 97 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:03<1:54:22,  3.43s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<52:53,  1.59s/it]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:04<23:49,  1.40it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:04<20:37,  1.61it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:04<16:09,  2.06it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:04<12:49,  2.59it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:05<03:22,  9.79it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:05<03:08, 10.49it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:05<03:01, 10.89it/s][A
Fetching 2000 URLs:   1%|          | 21/2000 [00:05<03:01, 10.89it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:05<02:07, 15.54it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:06<03:01, 10.84it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:06<02:22, 13

Batch had 1686 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1681 more
Batch complete → 198 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 98 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:02<1:38:19,  2.95s/it][A
Fetching 2000 URLs:   0%|          | 2/2000 [00:03<42:50,  1.29s/it]  [A
Fetching 2000 URLs:   0%|          | 4/2000 [00:03<18:46,  1.77it/s][A
Fetching 2000 URLs:   0%|          | 6/2000 [00:03<11:12,  2.97it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:03<07:39,  4.33it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:03<05:35,  5.93it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:03<03:52,  8.56it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:04<02:44, 12.08it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:04<01:51, 17.78it/s][A
Fetching 2000 URLs:   1%|▏         | 27/2000 [00:04<01:48, 18.16it/s][A
Fetching 2000 URLs:   2%|▏         | 30/2000 [00:04<01:43, 18.98it/s][A
Fetching 2000 URLs:   2%|▏         | 33/2000 [00:04<02:06, 15.52it/s][A
Fetching 2000 URLs:   2%|▏         | 35/2000 [00:05<02:32, 1

Batch had 1783 failures after retries:
  • unknown: TooManyRedirects – 0, message='', url='http://meyeucon.org/9704/y-nghia-cua-ten-nguoi-van-m/'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1778 more
Batch complete → 127 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 99 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:01<43:29,  1.31s/it][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<13:28,  2.47it/s][A
Fetching 2000 URLs:   0%|          | 5/2000 [00:01<07:37,  4.36it/s][A
Fetching 2000 URLs:   0%|          | 7/2000 [00:01<05:51,  5.68it/s][A
Fetching 2000 URLs:   0%|          | 9/2000 [00:01<04:21,  7.62it/s][A
Fetching 2000 URLs:   1%|          | 13/2000 [00:02<02:59, 11.04it/s][A
Fetching 2000 URLs:   1%|          | 15/2000 [00:02<02:46, 11.95it/s][A
Fetching 2000 URLs:   1%|          | 17/2000 [00:02<02:52, 11.47it/s][A
Fetching 2000 URLs:   1%|          | 19/2000 [00:02<02:32, 13.02it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:02<02:01, 16.31it/s][A
Fetching 2000 URLs:   1%|          | 24/2000 [00:02<02:00, 16.40it/s][A
Fetching 2000 URLs:   1%|▏         | 26/2000 [00:03<02:24, 13.62it/s][A
Fetching 2000 URLs:   1%|▏         | 29/2000 [00:03<02:29, 13.18

Batch had 1703 failures after retries:
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1698 more
Batch complete → 167 successful / 2,000 attempted
Pausing 1.5 seconds between batches...

Starting batch 100 (2000 URLs)



Fetching 2000 URLs:   0%|          | 0/2000 [00:00<?, ?it/s][A
Fetching 2000 URLs:   0%|          | 1/2000 [00:00<32:12,  1.03it/s][A
Fetching 2000 URLs:   0%|          | 3/2000 [00:01<14:21,  2.32it/s][A
Fetching 2000 URLs:   0%|          | 4/2000 [00:01<11:03,  3.01it/s][A
Fetching 2000 URLs:   0%|          | 8/2000 [00:01<04:16,  7.77it/s][A
Fetching 2000 URLs:   0%|          | 10/2000 [00:02<04:31,  7.33it/s][A
Fetching 2000 URLs:   1%|          | 12/2000 [00:02<04:11,  7.90it/s][A
Fetching 2000 URLs:   1%|          | 16/2000 [00:02<02:40, 12.39it/s][A
Fetching 2000 URLs:   1%|          | 18/2000 [00:02<02:45, 11.95it/s][A
Fetching 2000 URLs:   1%|          | 20/2000 [00:02<02:35, 12.74it/s][A
Fetching 2000 URLs:   1%|          | 22/2000 [00:02<02:30, 13.11it/s][A
Fetching 2000 URLs:   1%|▏         | 25/2000 [00:02<02:08, 15.43it/s][A
Fetching 2000 URLs:   1%|▏         | 28/2000 [00:03<01:49, 17.98it/s][A
Fetching 2000 URLs:   2%|▏         | 31/2000 [00:03<01:59, 16.5

Batch had 1630 failures after retries:
  • unknown: ClientConnectorError – Cannot connect to host www.vncgarden.com:443 ssl:default [None]
  • unknown: ClientConnectorDNSError – Cannot connect to host winbetcasino.life:443 ssl:default [nodename nor servname provided, or not known]
  • unknown: TooManyRedirects – 0, message='', url='https://www.hanoisoundstuff.com/gia-vang-9999-o-thai-nguyen.html'
  • unknown: TimeoutError – 
  • unknown: TimeoutError – 
  … and 1625 more
Batch complete → 227 successful / 2,000 attempted

Fetching finished. Total successful HTML files: 18,464
