In [18]:
import requests
from IPython.display import Markdown, display
import time
import pandas as pd
from datetime import datetime

# base_urls = ['http://localhost:3050',"https://reverie-reader-487899942826.us-central1.run.app/"]
# base_urls = ['http://localhost:8072',"http://34.28.36.150","https://r.jina.ai"]
base_urls = ['http://localhost:8075']


# base_urls = ['https://reverie-reader-487899942826.us-central1.run.app/']
urls_to_scrape = [
    'https://en.wikipedia.org/wiki/Tensor_product',
    'https://en.wikipedia.org/wiki/Category_theory',
    'https://www.math3ma.com/blog/the-tensor-product-demystified',
    'https://stackoverflow.com/questions/51358235/is-there-a-difference-between-npm-start-and-npm-run-start',
    'https://en.wikipedia.org/wiki/Linear_algebra'
]

headers = {
    'X-With-Images-Summary': 'true',
    'X-With-Links-Summary': 'true',
    'X-Remove-Images-From-Markdown': 'true',
    'X-Remove-Links-From-Markdown': 'true',
}

# Create list to store results
results = []

# Test each base URL and measure response time
for base_url in base_urls:
    for url_to_scrape in urls_to_scrape:
        try:
            url = f'{base_url}/{url_to_scrape}'
            start_time = time.time()
            response = requests.get(url, headers=headers)
            end_time = time.time()

            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': round(end_time - start_time, 2),
                'status_code': response.status_code,
                'success': True,
                'error': None
            }

            # Optional: display the response
            # display(Markdown(response.text))

        except requests.exceptions.RequestException as e:
            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': None,
                'status_code': None,
                'success': False,
                'error': str(e)
            }

        results.append(result)

# Create DataFrame and display results
df = pd.DataFrame(results)
print("\nTiming Results:")
display(df)

# Calculate summary statistics
print("\nSummary Statistics by Base URL:")
summary = df[df['success']].groupby('base_url')['response_time'].agg(['mean', 'min', 'max', 'count'])
display(summary)


Timing Results:


Unnamed: 0,timestamp,base_url,url,response_time,status_code,success,error
0,2025-01-02 21:12:59.844151,http://localhost:8075,https://en.wikipedia.org/wiki/Tensor_product,0.21,500,True,
1,2025-01-02 21:12:59.863228,http://localhost:8075,https://en.wikipedia.org/wiki/Category_theory,0.02,500,True,
2,2025-01-02 21:12:59.879198,http://localhost:8075,https://www.math3ma.com/blog/the-tensor-produc...,0.02,500,True,
3,2025-01-02 21:12:59.912703,http://localhost:8075,https://stackoverflow.com/questions/51358235/i...,0.03,500,True,
4,2025-01-02 21:12:59.963699,http://localhost:8075,https://en.wikipedia.org/wiki/Linear_algebra,0.05,500,True,



Summary Statistics by Base URL:


Unnamed: 0_level_0,mean,min,max,count
base_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
http://localhost:8075,0.066,0.02,0.21,5


In [16]:
import httpx
import asyncio
from IPython.display import Markdown, display
import pandas as pd
from datetime import datetime
import nest_asyncio

nest_asyncio.apply()

base_urls = ["http://34.28.36.150"]

urls_to_scrape = [
    'https://en.wikipedia.org/wiki/Tensor_product',
    'https://en.wikipedia.org/wiki/Category_theory',
    'https://www.math3ma.com/blog/the-tensor-product-demystified',
    'https://stackoverflow.com/questions/51358235/is-there-a-difference-between-npm-start-and-npm-run-start',
    'https://en.wikipedia.org/wiki/Linear_algebra'
]

headers = {
    'X-With-Images-Summary': 'true',
    'X-With-Links-Summary': 'true',
    'X-Remove-Images-From-Markdown': 'true',
    'X-Remove-Links-From-Markdown': 'true',
}

# Create list to store results
results = []

async def fetch(url, base_url):
    async with httpx.AsyncClient() as client:
        start_time = datetime.now()
        try:
            response = await client.get(url, headers=headers)
            response_time = (datetime.now() - start_time).total_seconds()
            return {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url,
                'response_time': round(response_time, 2),
                'status_code': response.status_code,
                'success': True,
                'error': None
            }
        except httpx.RequestError as e:
            return {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url,
                'response_time': None,
                'status_code': None,
                'success': False,
                'error': str(e)
            }

async def main():
    tasks = []
    for base_url in base_urls:
        for url_to_scrape in urls_to_scrape:
            url = f'{base_url}/{url_to_scrape}'
            tasks.append(fetch(url, base_url))

    results.extend(await asyncio.gather(*tasks, return_exceptions=True))

    print(results)

# Run the async main function
asyncio.run(main())

# Create DataFrame and display results
df = pd.DataFrame(results)
print("\nTiming Results:")
display(df)

# Calculate summary statistics
print("\nSummary Statistics by Base URL:")
summary = df[df['success']].groupby('base_url')['response_time'].agg(['mean', 'min', 'max', 'count'])
display(summary)

[{'timestamp': datetime.datetime(2025, 1, 2, 19, 49, 16, 796392), 'base_url': 'http://34.28.36.150', 'url': 'http://34.28.36.150/https://en.wikipedia.org/wiki/Tensor_product', 'response_time': None, 'status_code': None, 'success': False, 'error': ''}, {'timestamp': datetime.datetime(2025, 1, 2, 19, 49, 16, 794330), 'base_url': 'http://34.28.36.150', 'url': 'http://34.28.36.150/https://en.wikipedia.org/wiki/Category_theory', 'response_time': None, 'status_code': None, 'success': False, 'error': ''}, {'timestamp': datetime.datetime(2025, 1, 2, 19, 49, 16, 799035), 'base_url': 'http://34.28.36.150', 'url': 'http://34.28.36.150/https://www.math3ma.com/blog/the-tensor-product-demystified', 'response_time': None, 'status_code': None, 'success': False, 'error': ''}, {'timestamp': datetime.datetime(2025, 1, 2, 19, 49, 16, 798030), 'base_url': 'http://34.28.36.150', 'url': 'http://34.28.36.150/https://stackoverflow.com/questions/51358235/is-there-a-difference-between-npm-start-and-npm-run-start

Unnamed: 0,timestamp,base_url,url,response_time,status_code,success,error
0,2025-01-02 19:49:16.796392,http://34.28.36.150,http://34.28.36.150/https://en.wikipedia.org/w...,,,False,
1,2025-01-02 19:49:16.794330,http://34.28.36.150,http://34.28.36.150/https://en.wikipedia.org/w...,,,False,
2,2025-01-02 19:49:16.799035,http://34.28.36.150,http://34.28.36.150/https://www.math3ma.com/bl...,,,False,
3,2025-01-02 19:49:16.798030,http://34.28.36.150,http://34.28.36.150/https://stackoverflow.com/...,,,False,
4,2025-01-02 19:49:16.800003,http://34.28.36.150,http://34.28.36.150/https://en.wikipedia.org/w...,,,False,



Summary Statistics by Base URL:


Unnamed: 0_level_0,mean,min,max,count
base_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
