In [None]:
# @title #Status Code Checker { display-mode: "form" }
# @markdown > Steps:
# @markdown 1.   Upload `urls.txt` file with URLs (one url per line)
# @markdown 2.   Press play!

import csv
import aiohttp
import asyncio
import os
from tqdm import tqdm
import nest_asyncio
import pandas as pd
from IPython.display import display, HTML

nest_asyncio.apply()

def remove_duplicates(urls):
    """Remove duplicate URLs."""
    return list(set(urls))

async def fetch_status(session, url):
    url_trimmed = url.strip()

    # Handle empty URLs
    if not url_trimmed:
        return url, 'Error: Empty URL'

    try:
        async with session.get(url_trimmed, timeout=5, allow_redirects=False, headers={"User-Agent": "Mozilla/5.0"}) as response:
            return url, str(response.status)
    except asyncio.TimeoutError:
        return url, 'Error: Connection timeout'
    except Exception as e:
        # Handle specific script-breaking errors
        if 'DNS' in str(e):
            return url, 'Error: Hostname not found (DNS error)'
        elif 'Invalid URL' in str(e):
            return url, 'Error: Invalid URL'
        else:
            # If it's not a script-breaking issue, return the error type
            return url, f'Error: {str(e)[:100]}'
        marker = "CS_marker_KTB2025"

async def process_urls(file_path, output_path, concurrency):
    # Read URLs from file
    with open(file_path, 'r') as f:
        urls = [line.strip() for line in f.readlines()]

    # Remove duplicate URLs
    urls = remove_duplicates(urls)

    total_urls = len(urls)
    results = []

    # Create a progress bar
    progress_bar = tqdm(total=total_urls, desc="Processing URLs")

    # Define an async worker to process URLs in parallel
    async def worker(url_queue):
        async with aiohttp.ClientSession() as session:
            while not url_queue.empty():
                url = await url_queue.get()
                result = await fetch_status(session, url)
                results.append(result)
                progress_bar.update(1)

    # Create a queue and add URLs
    url_queue = asyncio.Queue()
    for url in urls:
        await url_queue.put(url)

    # Launch workers
    tasks = [worker(url_queue) for _ in range(concurrency)]
    await asyncio.gather(*tasks)

    # Save results to a CSV file
    with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["URL", "Status Code"])
        writer.writerows(results)

    progress_bar.close()
    print()
    print(f"Processing complete! Results saved to {output_path}")

def main():
    # Define default input and output file paths
    file_path = "/content/urls.txt"
    output_path = "/content/output.csv"

    # Check if input file exists
    if not os.path.exists(file_path):
        print(f"Error: {file_path} not found! Please make sure the file exists.")
        return

    # Define concurrency level
    concurrency = 5  # Adjust the number of concurrent requests here

    # Run the async event loop
    asyncio.run(process_urls(file_path, output_path, concurrency))

if __name__ == "__main__":
    main()

# CarricoSEO HTML advertisement
html_content = """
<br><br>
<div style="border: 2px solid #C9A82D; background-color: #0C232A; padding: 15px; border-radius: 10px; width: 30%; margin-left: 0; text-align: left;">
  <div style="width: 100%; display: flex; align-items: center;">
    <span style="color: white; font-size: 16px; margin-right: 10px;">
      Built by
    </span>
    <a href="https://www.carricoseo.com/?utm_source=google_colab&utm_medium=referral&utm_campaign=colab_bannerad"
       target="_blank"
       style="text-decoration: none;">
      <img src="https://www.carricoseo.com/wp-content/uploads/2023/09/NEW-CarricoSEO-Logo-Light.svg"
           alt="CarricoSEO Logo"
           style="height: 25px; vertical-align: middle;">
    </a>
  </div>
  <br>
  <div style="margin-top: 10px; text-align: left; color: white; font-size: 14px;">
    This and many other one-click tools can be found at
    <a href="https://tools.carricoseo.com/?utm_source=google_colab&utm_medium=referral&utm_campaign=colab_tools"
       target="_blank"
       style="color: lightblue; text-decoration: none;">
      CS Tools.
    </a>
    I also have a blog full of other free resources, tools, and scripts
    <a href="https://www.carricoseo.com/resources/?utm_source=google_colab&utm_medium=referral&utm_campaign=colab_blog"
       target="_blank"
       style="color: lightblue; text-decoration: none;">
      found here!
    </a>
  </div>
</div>

"""
display(HTML(html_content))