In [26]:
import requests
from IPython.display import Markdown, display
import time
import pandas as pd
from datetime import datetime

# base_urls = ['http://localhost:3050',"https://reverie-reader-487899942826.us-central1.run.app/"]
base_urls = ['http://localhost:3050',"http://localhost:8072"]

# base_urls = ['https://reverie-reader-487899942826.us-central1.run.app/']
urls_to_scrape = [
    'https://en.wikipedia.org/wiki/Tensor_product',
    'https://en.wikipedia.org/wiki/Category_theory',
    'https://www.math3ma.com/blog/the-tensor-product-demystified',
    'https://stackoverflow.com/questions/51358235/is-there-a-difference-between-npm-start-and-npm-run-start',
    'https://en.wikipedia.org/wiki/Linear_algebra'
]

headers = {
    'X-With-Images-Summary': 'true',
    'X-With-Links-Summary': 'true',
    'X-Remove-Images-From-Markdown': 'true',
    'X-Remove-Links-From-Markdown': 'true',
}

# Create list to store results
results = []

# Test each base URL and measure response time
for base_url in base_urls:
    for url_to_scrape in urls_to_scrape:
        try:
            url = f'{base_url}/{url_to_scrape}'
            start_time = time.time()
            response = requests.get(url, headers=headers)
            end_time = time.time()

            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': round(end_time - start_time, 2),
                'status_code': response.status_code,
                'success': True,
                'error': None
            }

            # Optional: display the response
            # display(Markdown(response.text))

        except requests.exceptions.RequestException as e:
            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': None,
                'status_code': None,
                'success': False,
                'error': str(e)
            }

        results.append(result)

# Create DataFrame and display results
df = pd.DataFrame(results)
print("\nTiming Results:")
display(df)

# Calculate summary statistics
print("\nSummary Statistics by Base URL:")
summary = df[df['success']].groupby('base_url')['response_time'].agg(['mean', 'min', 'max', 'count'])
display(summary)


Timing Results:


Unnamed: 0,timestamp,base_url,url,response_time,status_code,success,error
0,2025-01-02 01:49:55.824931,http://localhost:3050,https://en.wikipedia.org/wiki/Tensor_product,2.65,200,True,
1,2025-01-02 01:49:57.358459,http://localhost:3050,https://en.wikipedia.org/wiki/Category_theory,1.53,200,True,
2,2025-01-02 01:50:04.115342,http://localhost:3050,https://www.math3ma.com/blog/the-tensor-produc...,6.76,200,True,
3,2025-01-02 01:50:21.027971,http://localhost:3050,https://stackoverflow.com/questions/51358235/i...,16.91,200,True,
4,2025-01-02 01:50:23.019044,http://localhost:3050,https://en.wikipedia.org/wiki/Linear_algebra,1.99,200,True,
5,2025-01-02 01:50:34.861946,http://localhost:8072,https://en.wikipedia.org/wiki/Tensor_product,11.84,200,True,
6,2025-01-02 01:50:39.195872,http://localhost:8072,https://en.wikipedia.org/wiki/Category_theory,4.33,200,True,
7,2025-01-02 01:50:41.225964,http://localhost:8072,https://www.math3ma.com/blog/the-tensor-produc...,2.03,200,True,
8,2025-01-02 01:50:43.645151,http://localhost:8072,https://stackoverflow.com/questions/51358235/i...,2.42,200,True,
9,2025-01-02 01:50:49.799750,http://localhost:8072,https://en.wikipedia.org/wiki/Linear_algebra,6.15,200,True,



Summary Statistics by Base URL:


Unnamed: 0_level_0,mean,min,max,count
base_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
http://localhost:3050,5.968,1.53,16.91,5
http://localhost:8072,5.354,2.03,11.84,5
