In [1]:
%pip install boto3 pandas plotly requests pillow tabulate kaleido nbformat ipython

Note: you may need to restart the kernel to use updated packages.


In [2]:
import json
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import requests
import os

from utils import get_df_for_benchmark, performance_score, shorten_gpu_name, get_gpu_classes

In [3]:
data_cache_dir = "./data"
os.makedirs(data_cache_dir, exist_ok=True)
full_df_path = os.path.join(data_cache_dir, "all-data.csv")

def load_benchmark(benchmark_id: str):
    cache_location = os.path.join(data_cache_dir, f"{benchmark_id}.csv")
    df = None
    try:
        print(f"Loading data from cached CSV: {cache_location}")
        df = pd.read_csv(cache_location)
        df["timestamp"] = pd.to_datetime(df["timestamp"])
    except FileNotFoundError:
        # otherwise, get the data from the API
        print("Cached CSV not found. Loading data from DynamoDB")
        df = get_df_for_benchmark(benchmark_id)
        if df is not None:
            df["gpu"] = df["gpu"].apply(shorten_gpu_name)
            df["gpu_performance_score"] = df["gpu"].apply(performance_score)
            df.to_csv(cache_location, index=False)
    except Exception as e:
        print("Error loading data from CSV.")
        print(e)
    if df is not None:
        print(f"Loaded {len(df)} rows of data for benchmark {benchmark_id}")
    else:
        print(f"No data found for benchmark {benchmark_id}")
    return df


with open("benchmark_ids.txt", "r") as f:
    benchmark_ids = f.read().splitlines()

try:
    df = pd.read_csv(full_df_path)
    df["timestamp"] = pd.to_datetime(df["timestamp"])
    print(f"Loaded {len(df)} rows of data from {full_df_path}")
except FileNotFoundError:
    print(f"Couldn't find cached CSV at {full_df_path}. Loading data from DynamoDB.")
    chunks = []

    for benchmark_id in benchmark_ids:
        chunk = load_benchmark(benchmark_id)
        if chunk is not None:
            chunks.append(chunk)
    df = pd.concat(chunks)
    df.to_csv(full_df_path, index=False)

Loaded 191032 rows of data from ./data/all-data.csv


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191032 entries, 0 to 191031
Data columns (total 15 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   timestamp              191032 non-null  datetime64[ns]
 1   batch_size             191032 non-null  int64         
 2   image_size             191032 non-null  int64         
 3   backend                191032 non-null  object        
 4   salad_machine_id       191032 non-null  object        
 5   images                 191032 non-null  object        
 6   qr_data                191032 non-null  object        
 7   prompt                 191032 non-null  object        
 8   steps                  191032 non-null  int64         
 9   qrGenTime              191032 non-null  float64       
 10  imageGenTime           191032 non-null  float64       
 11  gpu                    191032 non-null  object        
 12  vram                   191032 non-null  floa

In [5]:
df.head()

Unnamed: 0,timestamp,batch_size,image_size,backend,salad_machine_id,images,qr_data,prompt,steps,qrGenTime,imageGenTime,gpu,vram,totalTime,gpu_performance_score
0,2023-11-13 21:17:34.592,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.502758,111.598356,GTX 1650,4.0,112.106,1650.1
1,2023-11-13 21:17:34.592,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.509855,100.297959,GTX 1650,4.0,100.813,1650.1
2,2023-11-13 21:19:45.664,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.513511,101.023498,GTX 1650,4.0,101.543,1650.1
3,2023-11-13 21:21:56.736,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.510402,100.517272,GTX 1650,4.0,101.033,1650.1
4,2023-11-13 21:24:07.808,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.502009,100.377556,GTX 1650,4.0,100.885,1650.1


In [6]:
def get_data_for_gpu(gpu: str, df: pd.DataFrame = df):
    gpu_df = df[df["gpu"] == gpu]
    return gpu_df

gpu_classes = get_gpu_classes()
print(f"{len(gpu_classes)} GPU classes found")

23 GPU classes found


In [7]:
# Get the total number of images generated. Can be determined by summing batch_size in all rows
total_images = df["batch_size"].sum()

# total with size 512
total_images_512 = df[df["image_size"] == 512]["batch_size"].sum()

# total with size 768
total_images_768 = df[df["image_size"] == 768]["batch_size"].sum()

print(f"Total images generated: {total_images}")
print(f"Total images generated with size 512: {total_images_512}")
print(f"Total images generated with size 768: {total_images_768}")

Total images generated: 458473
Total images generated with size 512: 238193
Total images generated with size 768: 220280


In [8]:
gpu_short_names = list(map(lambda item: shorten_gpu_name(item["name"]), gpu_classes))
def sort_gpu_list(gpu_list):
    # Helper function to extract numerical value and Ti status from GPU model name
    def extract_gpu_info(model):
        # Extract numerical value
        num_value = int(''.join(filter(str.isdigit, model)))
        # Check for 'Ti' version
        is_ti = 1 if model.endswith('Ti') else 0
        return num_value + is_ti

    # Sort the list using the helper function
    gpu_list.sort(key=lambda model: extract_gpu_info(model))

    return gpu_list

gpu_short_names = sort_gpu_list(gpu_short_names)
for name in gpu_short_names:
    num_results = len(get_data_for_gpu(name))
    print(f"{name}: {num_results} results")

GTX 1050 Ti: 1189 results
GTX 1060: 2579 results
GTX 1070: 5700 results
GTX 1650: 2617 results
GTX 1660: 5920 results
GTX 1660: 5920 results
RTX 2060: 9962 results
RTX 2070: 9827 results
RTX 2080: 9620 results
RTX 2080 Ti: 9637 results
RTX 3050: 13348 results
RTX 3060: 9638 results
RTX 3060 Ti: 9675 results
RTX 3070: 9372 results
RTX 3070 Ti: 9256 results
RTX 3080: 9591 results
RTX 3080 Ti: 9397 results
RTX 3090: 9564 results
RTX 3090 Ti: 9600 results
RTX 4070: 9635 results
RTX 4070 Ti: 9608 results
RTX 4080: 9600 results
RTX 4090: 9601 results
RTX 4080: 9600 results
RTX 4090: 9601 results


In [9]:
def summarize_gpu(gpu: str):
    gpu_df = get_data_for_gpu(gpu)
    total_rows_in_group = len(gpu_df)
    sum_of_batch_sizes = gpu_df["batch_size"].sum()
    print("##################################################")
    print(f"\n{gpu}: {total_rows_in_group} rows, {sum_of_batch_sizes} images\n")

    # Group by backend and show how many were completed for each backend
    backend_counts = gpu_df.groupby("backend")


    for backend in backend_counts.groups.keys():
        total_rows_in_group = len(backend_counts.groups[backend])
        sum_of_batch_sizes = backend_counts.get_group(backend)["batch_size"].sum()
        print(f"{backend}: {total_rows_in_group} rows, {sum_of_batch_sizes} images")


    print("\nBy Image Size")

    # Group by image size and show how many were completed for each image size, for each backend
    image_size_counts = gpu_df.groupby("image_size")

    for image_size in image_size_counts.groups.keys():
        for backend in backend_counts.groups.keys():
            try:
              total_rows_for_backend_and_image_size = len(image_size_counts.get_group(image_size).groupby("backend").groups[backend])
              sum_of_batch_sizes = image_size_counts.get_group(image_size).groupby("backend").get_group(backend)["batch_size"].sum()
              print(f"{image_size}x{image_size} {backend}: {total_rows_for_backend_and_image_size} rows, {sum_of_batch_sizes} images")
            except KeyError:
              print(f"{image_size}x{image_size} {backend}: 0 rows, 0 images")

    print("\nBy Batch Size")

    # Group by batch size and show how many were completed for each batch size, for each backend
    batch_size_counts = gpu_df.groupby("batch_size")

    for batch_size in batch_size_counts.groups.keys():
        print()
        for image_size in image_size_counts.groups.keys():
          print()
          for backend in backend_counts.groups.keys():
              selection = gpu_df[gpu_df["batch_size"] == batch_size]
              selection = selection[selection["image_size"] == image_size]
              selection = selection[selection["backend"] == backend]
              total_rows_for_backend_and_image_size = len(selection)
              sum_of_batch_sizes = selection["batch_size"].sum()
              avg_total_time = selection["totalTime"].mean()
              print(f"{batch_size} {image_size}x{image_size} {backend}: {total_rows_for_backend_and_image_size} rows, {sum_of_batch_sizes} images, {avg_total_time} seconds")

# for gpu in gpu_short_names:
#     summarize_gpu(gpu)

In [15]:
from plotly.subplots import make_subplots

chart_dir = "./charts"
os.makedirs(chart_dir, exist_ok=True)

backend_colors = {
    "a1111": "#FF0000",
    "comfy": "#00FF00",
    "sdnext": "#0000FF",
    "stable-fast-qr-code": "#FF00FF",
}


def draw_bar_chart_for_gpu(
    gpu: str,
    df: pd.DataFrame = df,
    metric: str = "totalTime",
    metric_title: str = "Total Request Time (seconds)",
    prefix: str = "",
):
    gpu_df = get_data_for_gpu(gpu, df)
    if prefix.endswith("/"):
        os.makedirs(f"{chart_dir}/{prefix}", exist_ok=True)

    # Initialize the traces for each backend
    traces = {
        backend: {
            "x": [],
            "y": [],
            "type": "bar",
            "name": backend,
            "text": [],
            "textposition": "outside",
        }
        for backend in ["a1111", "comfy", "sdnext", "stable-fast-qr-code"]
    }

    # Create a flag to add legend only for the first trace
    first_trace_flag = True

    for image_size in [512, 768]:
        for batch_size in [1, 2, 4]:
            selection = gpu_df[
                (gpu_df["image_size"] == image_size)
                & (gpu_df["batch_size"] == batch_size)
            ]

            for backend in ["a1111", "comfy", "sdnext", "stable-fast-qr-code"]:
                unit = selection[selection["backend"] == backend]
                avg_total_time = unit[metric].mean()

                # Append the x and y values to the corresponding trace
                traces[backend]["x"].append(f"{batch_size} @ {image_size}")
                traces[backend]["y"].append(avg_total_time)
                traces[backend]["text"].append(f"{avg_total_time:.2f}")

                # Add the legend only for the first trace of each backend
                if first_trace_flag:
                    traces[backend]["showlegend"] = True
                    first_trace_flag = False

    # Reset the flag for the next backend
    first_trace_flag = True

    # Convert trace dictionaries to plotly graph objects and add to the figure
    fig = make_subplots(rows=1, cols=1)
    for backend, trace in traces.items():
        fig.add_trace(go.Bar(trace), row=1, col=1)

    # Update the layout
    title = f"{gpu} Average {metric_title} (Lower is Better)"
    fig.update_layout(
        barmode="group",
        title=title,
        xaxis_title="Batch Size @ Image Size",
        yaxis_title=f"Average {metric_title} (seconds)",
        legend=dict(
            orientation="h",  # Horizontal orientation
            yanchor="bottom",
            y=1.02,  # Position the legend just above the top of the chart
            xanchor="center",
            x=0.5,
        ),
    )

    return fig
    


for gpu in gpu_short_names:
    fig = draw_bar_chart_for_gpu(gpu, df, "totalTime", "Total Request Time (seconds)", "raw/")
    fig.show()
    fig.write_image(f"{chart_dir}/raw/{gpu}-totalTime.png")

In [13]:
def draw_bar_chart_for_per_machine_performance(gpu:str, image_size:int, batch_size:int, backend:str):
    gpu_df = get_data_for_gpu(gpu)
    selection = gpu_df[(gpu_df["image_size"] == image_size) & (gpu_df["batch_size"] == batch_size) & (gpu_df["backend"] == backend)]
    by_machine = selection.groupby("salad_machine_id")
    
    traces = [{
        "x": [],
        "y": [],
        "type": "bar",
        "name": machine_id,
        "text": [],
        "textposition": "inside",
    } for machine_id in by_machine.groups.keys()]

    for i, machine_id in enumerate(by_machine.groups.keys()):
        unit = by_machine.get_group(machine_id)
        avg_total_time = unit["totalTime"].mean()
        traces[i]["x"].append(machine_id)
        traces[i]["y"].append(avg_total_time)
        traces[i]["text"].append(f"{avg_total_time:.2f}")

    overall_average = selection["totalTime"].mean()

    bad_nodes = []
    
    # Color any bars that are more than a standard deviation above the mean
    for i, trace in enumerate(traces):
        if trace["y"][0] > overall_average + selection["totalTime"].std():
            traces[i]["marker"] = {"color": "#FF0000"}
            bad_nodes.append(trace["x"][0])
        else:
            traces[i]["marker"] = {"color": "#0000FF"}
    
    fig = make_subplots(rows=1, cols=1)
    for trace in traces:
        fig.add_trace(go.Bar(trace), row=1, col=1)

    fig.update_layout(
        barmode="group",
        title=f"{gpu} - {batch_size} @ {image_size} - {backend} - Average Total Request Time (Lower is Better)",
        xaxis_title="Machine ID",
        yaxis_title="Average Total Request Time (seconds)",
        
    )
    return bad_nodes, fig


all_bad_nodes = []
os.makedirs(f"{chart_dir}/per-machine", exist_ok=True)

for gpu in gpu_short_names:
    for image_size in [512, 768]:
        for batch_size in [1, 2, 4]:
            for backend in ["a1111", "comfy", "sdnext", "stable-fast-qr-code"]:
                bad_nodes, fig = draw_bar_chart_for_per_machine_performance(gpu, image_size, batch_size, backend)
                fig.write_image(f"{chart_dir}/per-machine/{gpu}-{batch_size}-{image_size}-{backend}.png", width=1000, height=500)
                all_bad_nodes.extend(bad_nodes)

all_bad_nodes = list(set(all_bad_nodes))
print(f"Bad nodes: {len(all_bad_nodes)}")

Bad nodes: 211


In [14]:
total_nodes = df["salad_machine_id"].nunique()
print(f"Total nodes: {total_nodes}")


data_without_bad_nodes = df[~df["salad_machine_id"].isin(all_bad_nodes)]

os.makedirs(f"{chart_dir}/without-bad-nodes", exist_ok=True)

for gpu in gpu_short_names:
    fig = draw_bar_chart_for_gpu(gpu, data_without_bad_nodes, "totalTime", "Total Request Time (seconds)")
    fig.show()
    fig.write_image(f"{chart_dir}/without-bad-nodes/{gpu}-totalTime.png")

Total nodes: 829
Top of function: GTX 1050 Ti
GTX 1050 Ti Average Total Request Time (seconds) (Lower is Better)


Top of function: GTX 1060
GTX 1060 Average Total Request Time (seconds) (Lower is Better)


Top of function: GTX 1070
GTX 1070 Average Total Request Time (seconds) (Lower is Better)


Top of function: GTX 1650
GTX 1650 Average Total Request Time (seconds) (Lower is Better)


Top of function: GTX 1660
GTX 1660 Average Total Request Time (seconds) (Lower is Better)


Top of function: GTX 1660
GTX 1660 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 2060
RTX 2060 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 2070
RTX 2070 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 2080
RTX 2080 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 2080 Ti
RTX 2080 Ti Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 3050
RTX 3050 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 3060
RTX 3060 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 3060 Ti
RTX 3060 Ti Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 3070
RTX 3070 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 3070 Ti
RTX 3070 Ti Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 3080
RTX 3080 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 3080 Ti
RTX 3080 Ti Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 3090
RTX 3090 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 3090 Ti
RTX 3090 Ti Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 4070
RTX 4070 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 4070 Ti
RTX 4070 Ti Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 4080
RTX 4080 Average Total Request Time (seconds) (Lower is Better)


Top of function: RTX 4090
RTX 4090 Average Total Request Time (seconds) (Lower is Better)
