In [9]:
import requests
from IPython.display import Markdown, display
import time
import pandas as pd
from datetime import datetime

base_urls = ['http://localhost:3000', 'http://localhost:8072', 'https://r.jina.ai']

urls_to_scrape = [
    'https://en.wikipedia.org/wiki/Tensor_product',
    'https://en.wikipedia.org/wiki/Category_theory',
    'https://www.math3ma.com/blog/the-tensor-product-demystified',
    'https://stackoverflow.com/questions/51358235/is-there-a-difference-between-npm-start-and-npm-run-start',
    'https://en.wikipedia.org/wiki/Linear_algebra'
]

headers = {
    'X-With-Images-Summary': 'true',
    'X-With-Links-Summary': 'true',
    'X-Remove-Images-From-Markdown': 'true',
    'X-Remove-Links-From-Markdown': 'true',
}

# Create list to store results
results = []

# Test each base URL and measure response time
for base_url in base_urls:
    for url_to_scrape in urls_to_scrape:
        try:
            url = f'{base_url}/{url_to_scrape}'
            start_time = time.time()
            response = requests.get(url, headers=headers)
            end_time = time.time()

            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': round(end_time - start_time, 2),
                'status_code': response.status_code,
                'success': True,
                'error': None
            }

            # Optional: display the response
            # display(Markdown(response.text))

        except requests.exceptions.RequestException as e:
            result = {
                'timestamp': datetime.now(),
                'base_url': base_url,
                'url': url_to_scrape,
                'response_time': None,
                'status_code': None,
                'success': False,
                'error': str(e)
            }

        results.append(result)

# Create DataFrame and display results
df = pd.DataFrame(results)
print("\nTiming Results:")
display(df)

# Calculate summary statistics
print("\nSummary Statistics by Base URL:")
summary = df[df['success']].groupby('base_url')['response_time'].agg(['mean', 'min', 'max', 'count'])
display(summary)


Timing Results:


Unnamed: 0,timestamp,base_url,url,response_time,status_code,success,error
0,2024-12-31 12:14:48.336062,http://localhost:3000,https://en.wikipedia.org/wiki/Tensor_product,2.31,200,True,
1,2024-12-31 12:14:49.621913,http://localhost:3000,https://en.wikipedia.org/wiki/Category_theory,1.29,200,True,
2,2024-12-31 12:14:50.203331,http://localhost:3000,https://www.math3ma.com/blog/the-tensor-produc...,0.58,200,True,
3,2024-12-31 12:14:51.387699,http://localhost:3000,https://stackoverflow.com/questions/51358235/i...,1.18,200,True,
4,2024-12-31 12:14:52.988459,http://localhost:3000,https://en.wikipedia.org/wiki/Linear_algebra,1.6,200,True,
5,2024-12-31 12:15:04.451105,http://localhost:8072,https://en.wikipedia.org/wiki/Tensor_product,11.46,200,True,
6,2024-12-31 12:15:08.761900,http://localhost:8072,https://en.wikipedia.org/wiki/Category_theory,4.31,200,True,
7,2024-12-31 12:15:10.622340,http://localhost:8072,https://www.math3ma.com/blog/the-tensor-produc...,1.86,200,True,
8,2024-12-31 12:15:12.743692,http://localhost:8072,https://stackoverflow.com/questions/51358235/i...,2.12,200,True,
9,2024-12-31 12:15:16.529890,http://localhost:8072,https://en.wikipedia.org/wiki/Linear_algebra,3.79,200,True,



Summary Statistics by Base URL:


Unnamed: 0_level_0,mean,min,max,count
base_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
http://localhost:3000,1.392,0.58,2.31,5
http://localhost:8072,4.708,1.86,11.46,5
https://r.jina.ai,2.948,0.8,5.03,5
