In [3]:
import pandas as pd
import subprocess
from tqdm.auto import tqdm

In [4]:
INPUT_FILE = "./curl_http3_results.csv"
RESULTS_FILE = "./curl_http2_results.csv"
ERRORS_FILE = "./curl_http2_errors.csv"

In [10]:
df_input = pd.read_csv(INPUT_FILE).head(1000)
df_input

Unnamed: 0,url,time_appconnect,time_connect,time_namelookup,time_pretransfer,time_redirect,time_starttransfer,time_total,remote_ip,remote_port
0,www.google.com,0.0,0.022662,0.008878,0.022730,0.0,0.081263,0.093462,142.251.209.132,80
1,www.microsoft.com,0.0,0.014212,0.009477,0.014251,0.0,0.103980,0.121731,104.107.161.181,80
2,www.facebook.com,0.0,0.018751,0.005586,0.018789,0.0,0.033634,0.033684,157.240.251.35,80
3,www.amazonaws.com,0.0,0.114871,0.008822,0.114981,0.0,0.222334,0.222431,72.21.206.80,80
4,www.apple.com,0.0,0.009239,0.005936,0.009273,0.0,0.014353,0.014406,104.108.144.225,80
...,...,...,...,...,...,...,...,...,...,...
995,www.wetter.com,0.0,0.028378,0.013029,0.028508,0.0,0.044327,0.044527,143.204.215.25,80
996,www.sport.es,0.0,0.147898,0.130532,0.148017,0.0,0.166151,0.166223,146.75.118.133,80
997,www.tvn24.pl,0.0,0.025205,0.011640,0.025289,0.0,0.039386,0.039511,18.245.31.87,80
998,www.wisc.edu,0.0,0.156825,0.143415,0.156914,0.0,0.170707,0.170857,18.66.122.54,80


In [6]:
metrics_keys = [
    'time_appconnect',
    'time_connect',
    'time_namelookup',
    'time_pretransfer',
    'time_redirect',
    'time_starttransfer',
    'time_total',
    'remote_ip',
    'remote_port'
]

base_command = [
    'curl', '--http2',        # Force HTTP/2 over TCP+TLS
    '-4',                     # Use IPv4 only
    '-o', '/dev/null',        # Discard the actual output, only want metrics
    '-s',                     # Silent mode, suppress curl status messages
    '--max-time', '5',        # Set connection timeout to 5 seconds
    '-w', "\\n".join([f"{key}: %{{{key}}}" for key in metrics_keys]) + "\\n"  # Get specific metrics
]

In [None]:
results = []
errors = []

for index, row in tqdm(df_input.iterrows(), total=len(df_input), desc="Processing URLs", leave=True):
    url = row['url']
    command = base_command + [url]

    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True)

        metrics = {'url': url}
        for line in result.stdout.splitlines():
            if ': ' in line:
                key, val = line.split(': ', 1)
                metrics[key.strip()] = val.strip()

        results.append(metrics)

    except subprocess.CalledProcessError as e:
        errors.append({
            'url': url,
            'errorCode': e.returncode,
        })