In [2]:
import json
import csv
import polars as pl
import seaborn as sns
from datetime import datetime
from dateutil import parser
import os

price_file = "../../../benchmark/prices.json"
with open(price_file) as f:
    prices = json.load(f)

vcpu_price = 0.004
mem_gb_price = 0.001
    
def get_price_map():
  price_map = {}
  for gpu_obj in prices["items"]:
      gpu_name = gpu_obj["name"].lower()
      price_map[gpu_name] = {}
      for price_obj in gpu_obj["prices"]:
          price_map[gpu_name][price_obj["priority"]] = float(price_obj["price"])
  return price_map

price_map = get_price_map()
gpt4o_mini_price_per_million = {
  "input": .15,
  "output": .6
}
gpt4o_mini_token_multiplier = 2.0

def get_df(gpu):
    datafile = f"{gpu}.jsonl"
    node_counts = f"{gpu}-node-count.csv"
    test_config = f"{gpu}-test-config.json"

    df_file = f"{gpu}-df.csv"
    
    with open(test_config) as f:
        test_config = json.load(f)
    
    gpu_uuid = test_config["container"]["resources"]["gpu_classes"][0]
    gpu_obj = next((x for x in prices["items"] if x["id"] == gpu_uuid), None)
    gpu_name = gpu_obj["name"].lower()

    cost = {}
    for priority in price_map[gpu_name]:
        cost[priority] = price_map[gpu_name][priority] + (vcpu_price * test_config["container"]["resources"]["cpu"]) + (mem_gb_price * test_config["container"]["resources"]["memory"] // 1024)


    if os.path.exists(df_file):
        return pl.read_csv(df_file), cost

    all_results = []

    with open(datafile) as f:
        for line in f:
            if not line:
                continue
            data = json.loads(line)
            if "data" in data and "time" in data["data"]:
                data["data"]["time"] = parser.isoparse(data["data"]["time"])
                all_results.append(data)

    tz = all_results[0]["data"]["time"].tzinfo

    with open(node_counts) as f:
        reader = csv.reader(f)
        for row in reader:
            if not row:
                continue
            time, count = row
            if not time or not count:
                continue
            all_results.append({
                "type": "Point",
                "metric": "node_count",
                "data": {
                    "time": datetime.fromtimestamp(int(time), tz=tz),
                    "value": int(count)
                }
            })

    metrics = ["http_req_duration", "http_req_failed",
               "vus", "node_count", "inputTokens", "outputTokens"]

    all_results = sorted(all_results, key=lambda x: x["data"]["time"])

   
    first_time = all_results[0]["data"]["time"]
    all_results = [x for x in all_results if x["type"] == "Point" and x["metric"] in metrics]
    results = []
    for result in all_results:
        time_from_start = (result["data"]["time"] - first_time).total_seconds()
        value = result["data"]["value"]
        metric = result["metric"]
        results.append({
            "time_from_start": time_from_start,
            "value": value,
            "metric": metric,
            "gpu": gpu_name,
            "cpu": test_config["container"]["resources"]["cpu"],
            "memory": test_config["container"]["resources"]["memory"],
        })
    
    
    df = pl.DataFrame(results)
    df.sort("time_from_start", multithreaded=True)
    df.write_csv(df_file)
    return df, cost


def get_requests_per_second_by_vu(df):

    # First, let's filter for completed HTTP requests
    http_requests = df.filter(pl.col("metric") == "http_req_duration")

    # Get the VU counts at each time point
    vu_counts = df.filter(pl.col("metric") == "vus")

    # Function to count requests per second at each VU level
    http_requests_windowed = http_requests.with_columns(
        pl.col("time_from_start").floor().alias("time_window")
    )
    
    vu_counts_windowed = vu_counts.with_columns(
        pl.col("time_from_start").floor().alias("time_window")
    )
    
    # Get the latest VU count in each window
    vu_by_window = vu_counts_windowed.group_by("time_window").agg(
        pl.col("value").last().alias("vu_count")
    )
    
    # Count requests in each window
    requests_by_window = http_requests_windowed.group_by("time_window").agg(
        pl.count().alias("request_count")
    )
    
    # Join the datasets on time window
    result = requests_by_window.join(
        vu_by_window, on="time_window", how="inner"
    )
    
    # Calculate requests per second per VU
    result = result.with_columns(
        pl.col("request_count").alias("requests_per_second")
    )
    
    # Group by VU count to get average RPS at each VU level
    final_result = result.group_by("vu_count").agg(
        pl.col("requests_per_second").mean().alias("avg_requests_per_second"),
        pl.col("requests_per_second").count().alias("sample_count")
    ).sort("vu_count")
    return final_result


def get_gpt4o_mini_price(df):
  total_input_tokens = df.filter(pl.col("metric") == "inputTokens").select(pl.col("value")).sum().item()
  total_output_tokens = df.filter(pl.col("metric") == "outputTokens").select(pl.col("value")).sum().item()
  gpt4o_mini_price = gpt4o_mini_price_per_million["input"] * total_input_tokens * gpt4o_mini_token_multiplier / 1e6 + gpt4o_mini_price_per_million["output"] * total_output_tokens / 1e6
  return gpt4o_mini_price


def get_throughput_stats(rps_by_vu):
  max_throughput_row = rps_by_vu.sort("avg_requests_per_second", descending=True).head(1)

  max_throughput = max_throughput_row["avg_requests_per_second"][0]
  vus_at_max_throughput = max_throughput_row["vu_count"][0]
  return max_throughput, vus_at_max_throughput


def process_gpu(gpu):
    df, cost = get_df(gpu)
    rps_by_vu = get_requests_per_second_by_vu(df)
    max_throughput, vus_at_max_throughput = get_throughput_stats(rps_by_vu)
    gpt4o_mini_price = get_gpt4o_mini_price(df)
    num_images = df.filter(pl.col("metric") == "http_req_duration").select(pl.col("value")).count().item()
    avg_input_tokens_per_image = df.filter(pl.col("metric") == "inputTokens").select(pl.col("value")).mean().item()
    test_duration = df.filter(pl.col("metric") == "http_req_duration").select(pl.col("time_from_start")).max().item()
    max_nodes = df.filter(pl.col("metric") == "node_count").select(pl.col("value")).max().item()
    cost_of_cluster_per_s = max_nodes * cost["batch"] / 3600
    cost_per_image_at_peak_throughput = cost_of_cluster_per_s / max_throughput
    cost_of_whole_batch_at_peak_throughput = cost_per_image_at_peak_throughput * num_images
    savings_vs_openai = gpt4o_mini_price / cost_of_whole_batch_at_peak_throughput
    return {
        "gpu": gpu,
        "cost": cost,
        "max_throughput": max_throughput,
        "vus_at_max_throughput": vus_at_max_throughput,
        "avg_input_tokens_per_image": avg_input_tokens_per_image,
        "gpt4o_mini_price": gpt4o_mini_price,
        "cost_per_image_at_peak_throughput": cost_per_image_at_peak_throughput,
        "images_per_dollar_at_peak_throughput": 1 / cost_per_image_at_peak_throughput,
        "cost_of_whole_batch_at_peak_throughput": cost_of_whole_batch_at_peak_throughput,
        "savings_vs_openai": savings_vs_openai,
        "test_duration": test_duration,
        "max_nodes": max_nodes,
        "num_images": num_images
    }

gpus = ["3090", "4080", "4090"]
for gpu in gpus:
    print(json.dumps(process_gpu(gpu), indent=2))

  pl.count().alias("request_count")


{
  "gpu": "3090",
  "cost": {
    "high": 0.266,
    "medium": 0.21600000000000003,
    "low": 0.16599999999999998,
    "batch": 0.116
  },
  "max_throughput": 33.0,
  "vus_at_max_throughput": 237.0,
  "avg_input_tokens_per_image": 1438.3238708242393,
  "gpt4o_mini_price": 37.0655607,
  "cost_per_image_at_peak_throughput": 9.764309764309767e-06,
  "images_per_dollar_at_peak_throughput": 102413.79310344825,
  "cost_of_whole_batch_at_peak_throughput": 0.7025323232323234,
  "savings_vs_openai": 52.75993641041713,
  "test_duration": 3416.096383,
  "max_nodes": 10.0,
  "num_images": 71949
}
{
  "gpu": "4080",
  "cost": {
    "high": 0.29600000000000004,
    "medium": 0.246,
    "low": 0.196,
    "batch": 0.14600000000000002
  },
  "max_throughput": 51.36363636363637,
  "vus_at_max_throughput": 280.0,
  "avg_input_tokens_per_image": 1438.1196772041656,
  "gpt4o_mini_price": 55.4497893,
  "cost_per_image_at_peak_throughput": 7.895771878072763e-06,
  "images_per_dollar_at_peak_throughput": 12

In [11]:
def process_captions(gpu):
  caption_file = f"{gpu}-console.txt"
  with open(caption_file) as f:
    for line in f:
      content_start = line.find("] ") + 2
      content = line[content_start:]
      parts = content.split("|", 1)
      if len(parts) == 2:
        url = parts[0].strip()
        caption = parts[1].strip().strip('"')
        if len(url) > 0 and len(caption) > 0:
          yield url, caption
        
def captions_to_csv(gpu):
  with open(f"{gpu}-captions.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["url", "caption"])
    for url, caption in process_captions(gpu):
      writer.writerow([url, caption])
      
for gpu in gpus:
    captions_to_csv(gpu)