In [17]:
import requests
from IPython.display import Markdown, display
import time
import pandas as pd
from datetime import datetime

base_urls = ['http://localhost:3050',"https://reverie-reader-487899942826.us-central1.run.app/"]

urls_to_scrape = [
    'https://en.wikipedia.org/wiki/Tensor_product',
    'https://en.wikipedia.org/wiki/Category_theory',
    'https://www.math3ma.com/blog/the-tensor-product-demystified',
    'https://stackoverflow.com/questions/51358235/is-there-a-difference-between-npm-start-and-npm-run-start',
    'https://en.wikipedia.org/wiki/Linear_algebra'
]

headers = {
    'X-With-Images-Summary': 'true',
    'X-With-Links-Summary': 'true',
    'X-Remove-Images-From-Markdown': 'true',
    'X-Remove-Links-From-Markdown': 'true',
}

# Create list to store results
results = []

# Test each base URL and measure response time
for base_url in base_urls:
    for url_to_scrape in urls_to_scrape:
        try:
            url = f'{base_url}/{url_to_scrape}'
            start_time = time.time()
            response = requests.get(url, headers=headers)
            end_time = time.time()

            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': round(end_time - start_time, 2),
                'status_code': response.status_code,
                'success': True,
                'error': None
            }

            # Optional: display the response
            # display(Markdown(response.text))

        except requests.exceptions.RequestException as e:
            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': None,
                'status_code': None,
                'success': False,
                'error': str(e)
            }

        results.append(result)

# Create DataFrame and display results
df = pd.DataFrame(results)
print("\nTiming Results:")
display(df)

# Calculate summary statistics
print("\nSummary Statistics by Base URL:")
summary = df[df['success']].groupby('base_url')['response_time'].agg(['mean', 'min', 'max', 'count'])
display(summary)


Timing Results:


Unnamed: 0,timestamp,base_url,url,response_time,status_code,success,error
0,2025-01-01 22:57:01.742222,http://localhost:3050,https://en.wikipedia.org/wiki/Tensor_product,2.31,200,True,
1,2025-01-01 22:57:02.915074,http://localhost:3050,https://en.wikipedia.org/wiki/Category_theory,1.17,200,True,
2,2025-01-01 22:57:03.590292,http://localhost:3050,https://www.math3ma.com/blog/the-tensor-produc...,0.68,200,True,
3,2025-01-01 22:57:04.755840,http://localhost:3050,https://stackoverflow.com/questions/51358235/i...,1.17,200,True,
4,2025-01-01 22:57:06.356228,http://localhost:3050,https://en.wikipedia.org/wiki/Linear_algebra,1.6,200,True,
5,2025-01-01 22:57:21.602964,https://reverie-reader-487899942826.us-central...,https://en.wikipedia.org/wiki/Tensor_product,15.25,200,True,
6,2025-01-01 22:57:27.306414,https://reverie-reader-487899942826.us-central...,https://en.wikipedia.org/wiki/Category_theory,5.7,200,True,
7,2025-01-01 22:57:30.895962,https://reverie-reader-487899942826.us-central...,https://www.math3ma.com/blog/the-tensor-produc...,3.59,200,True,
8,2025-01-01 22:57:39.952144,https://reverie-reader-487899942826.us-central...,https://stackoverflow.com/questions/51358235/i...,9.06,200,True,
9,2025-01-01 22:57:43.430108,https://reverie-reader-487899942826.us-central...,https://en.wikipedia.org/wiki/Linear_algebra,3.48,500,True,



Summary Statistics by Base URL:


Unnamed: 0_level_0,mean,min,max,count
base_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
http://localhost:3050,1.386,0.68,2.31,5
https://reverie-reader-487899942826.us-central1.run.app/,7.416,3.48,15.25,5
