In [None]:
# @markdown #### Enter one or more internet addresses to check:
urls = "cloudflare.net" # @param {type:"string"}

# @markdown #### Enter to and from date:
start_date = "2014-05-29" # @param {type:"date"}
end_date = "2014-06-30" # @param {type:"date"}

# @markdown #### Maximum number of queries
limit = 10 # @param {type:"integer"}

# @markdown #### Frequency
frequency = "daily" # @param ["hourly", "daily", "monthly", "yearly"]

from google.colab import drive
drive.mount('/content/drive')

import sys
if not '/content/drive/MyDrive/Colab Notebooks/Wayback Google Analytics' in sys.path:
  sys.path.append('/content/drive/MyDrive/Colab Notebooks/Wayback Google Analytics')
from datetime import datetime
import asyncio
import aiohttp
from IPython.display import Image

from wayback_google_analytics.utils import (
    get_limit_from_frequency,
    get_14_digit_timestamp,
    validate_dates,
    COLLAPSE_OPTIONS
)

from wayback_google_analytics.scraper import (
    get_analytics_codes
)

from wayback_google_analytics.output import (
    init_output,
    write_output
)


async def main():
    """Main function. Runs get_analytics_codes() and prints results.

    Args:
        args: Command line arguments (argparse)

    Returns:
        None
    """

    global urls, start_date, end_date, limit, frequency
    print("Entered main", start_date)

    start_date = datetime.fromisoformat(start_date)
    end_date = datetime.fromisoformat(end_date)

    # Check if start_date is before end_date
    if not start_date < end_date:
        raise ValueError("Start date must be before end date.")

    # Update dates to 14-digit format
    if start_date:
        # start_date = get_14_digit_timestamp(start_date)
        start_date = start_date.strftime("%Y%m%d%H%M%S")

    if end_date:
        # end_date = get_14_digit_timestamp(end_date)
        end_date = end_date.strftime("%Y%m%d%H%M%S")

    # Gets appropriate limit for given frequency & converts frequency to collapse option
    if frequency:
        limit = (
            get_limit_from_frequency(
                frequency=frequency,
                start_date=start_date,
                end_date=end_date,
            )
            + 1
        )
        frequency = COLLAPSE_OPTIONS[frequency]

    semaphore = asyncio.Semaphore(10)

    # Warn user if large request
    if abs(int(limit)) > 500 or len(urls) > 9:
        print("limit:", limit)
        response = input(
            f"""Large requests can lead to being rate limited by archive.org.\n\n Current limit: {args.limit} (Recommended < 500) \n\n Current # of urls: {len(urls)} (Recommended < 10, unless limit < 50)

        Do you wish to proceed? (Yes/no)
                         """
        )
        if response.lower() not in ("yes", "y"):
            print("Request cancelled.")
            exit()

    try:
        async with semaphore:
            async with aiohttp.ClientSession() as session:
                results = await get_analytics_codes(
                    session=session,
                    urls=urls,
                    start_date=start_date,
                    end_date=end_date,
                    frequency=frequency,
                    limit=limit,
                    semaphore=semaphore,
                    skip_current=skip_current,
                )
                print(results)

        write_output('output.dot', 'dot', results)
    except aiohttp.ClientError as e:
        print(
            "Your request was rate limited. Wait 5 minutes and try again and consider reducing the limit and # of numbers."
        )

urls = urls.split()
skip_current = False

loop = asyncio.get_running_loop()
await loop.create_task(main())

Image('output.png')


In [None]:
!pwd
!ls -l
!ls -l drive/MyDrive