In [13]:
import requests
from IPython.display import Markdown, display
import time
import pandas as pd
from datetime import datetime

base_urls = ['http://localhost:8072','http://localhost:3050']

urls_to_scrape = [
    'https://en.wikipedia.org/wiki/Tensor_product',
    'https://en.wikipedia.org/wiki/Category_theory',
    'https://www.math3ma.com/blog/the-tensor-product-demystified',
    'https://stackoverflow.com/questions/51358235/is-there-a-difference-between-npm-start-and-npm-run-start',
    'https://en.wikipedia.org/wiki/Linear_algebra'
]

headers = {
    'X-With-Images-Summary': 'true',
    'X-With-Links-Summary': 'true',
    'X-Remove-Images-From-Markdown': 'true',
    'X-Remove-Links-From-Markdown': 'true',
}

# Create list to store results
results = []

# Test each base URL and measure response time
for base_url in base_urls:
    for url_to_scrape in urls_to_scrape:
        try:
            url = f'{base_url}/{url_to_scrape}'
            start_time = time.time()
            response = requests.get(url, headers=headers)
            end_time = time.time()

            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': round(end_time - start_time, 2),
                'status_code': response.status_code,
                'success': True,
                'error': None
            }

            # Optional: display the response
            # display(Markdown(response.text))

        except requests.exceptions.RequestException as e:
            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': None,
                'status_code': None,
                'success': False,
                'error': str(e)
            }

        results.append(result)

# Create DataFrame and display results
df = pd.DataFrame(results)
print("\nTiming Results:")
display(df)

# Calculate summary statistics
print("\nSummary Statistics by Base URL:")
summary = df[df['success']].groupby('base_url')['response_time'].agg(['mean', 'min', 'max', 'count'])
display(summary)


Timing Results:


Unnamed: 0,timestamp,base_url,url,response_time,status_code,success,error
0,2025-01-01 18:13:30.380697,http://localhost:8072,https://en.wikipedia.org/wiki/Tensor_product,0.2,500,True,
1,2025-01-01 18:13:30.397523,http://localhost:8072,https://en.wikipedia.org/wiki/Category_theory,0.02,500,True,
2,2025-01-01 18:13:30.415325,http://localhost:8072,https://www.math3ma.com/blog/the-tensor-produc...,0.02,500,True,
3,2025-01-01 18:13:30.426722,http://localhost:8072,https://stackoverflow.com/questions/51358235/i...,0.01,500,True,
4,2025-01-01 18:13:30.438196,http://localhost:8072,https://en.wikipedia.org/wiki/Linear_algebra,0.01,500,True,
5,2025-01-01 18:13:32.710091,http://localhost:3050,https://en.wikipedia.org/wiki/Tensor_product,2.27,200,True,
6,2025-01-01 18:13:34.010897,http://localhost:3050,https://en.wikipedia.org/wiki/Category_theory,1.3,200,True,
7,2025-01-01 18:13:34.559056,http://localhost:3050,https://www.math3ma.com/blog/the-tensor-produc...,0.55,200,True,
8,2025-01-01 18:13:35.861227,http://localhost:3050,https://stackoverflow.com/questions/51358235/i...,1.3,200,True,
9,2025-01-01 18:13:37.416510,http://localhost:3050,https://en.wikipedia.org/wiki/Linear_algebra,1.56,200,True,



Summary Statistics by Base URL:


Unnamed: 0_level_0,mean,min,max,count
base_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
http://localhost:3050,1.396,0.55,2.27,5
http://localhost:8072,0.052,0.01,0.2,5
