In [1]:
%pip install boto3 pandas plotly requests pillow tabulate kaleido nbformat ipython

Note: you may need to restart the kernel to use updated packages.


In [1]:
import json
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import requests
import os

from utils import get_df_for_benchmark, performance_score, shorten_gpu_name

In [None]:
data_cache_dir = "./data"
os.makedirs(data_cache_dir, exist_ok=True)

def load_benchmark(benchmark_id: str):
    cache_location = os.path.join(data_cache_dir, f"{benchmark_id}.csv")
    df = None
    try:
        print(f"Loading data from cached CSV: {cache_location}")
        df = pd.read_csv(cache_location)
        df["timestamp"] = pd.to_datetime(df["timestamp"])
    except FileNotFoundError:
        # otherwise, get the data from the API
        print("Cached CSV not found. Loading data from DynamoDB")
        df = get_df_for_benchmark(benchmark_id)
        if df is not None:
            df["gpu"] = df["gpu"].apply(shorten_gpu_name)
            df["gpu_performance_score"] = df["gpu"].apply(performance_score)
            df.to_csv(cache_location, index=False)
    except Exception as e:
        print("Error loading data from CSV.")
        print(e)
    if df is not None:
        print(f"Loaded {len(df)} rows of data for benchmark {benchmark_id}")
    else:
        print(f"No data found for benchmark {benchmark_id}")
    return df


with open("benchmark_ids.txt", "r") as f:
    benchmark_ids = f.read().splitlines()

chunks = []

for benchmark_id in benchmark_ids:
    chunk = load_benchmark(benchmark_id)
    if chunk is not None:
       chunks.append(chunk)

df = pd.concat(chunks)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 103577 entries, 0 to 599
Data columns (total 15 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   timestamp              103577 non-null  datetime64[ns]
 1   batch_size             103577 non-null  int64         
 2   image_size             103577 non-null  int64         
 3   backend                103577 non-null  object        
 4   salad_machine_id       103577 non-null  object        
 5   images                 103577 non-null  object        
 6   qr_data                103577 non-null  object        
 7   prompt                 103577 non-null  object        
 8   steps                  103577 non-null  int64         
 9   qrGenTime              103577 non-null  float64       
 10  imageGenTime           103577 non-null  float64       
 11  gpu                    103577 non-null  object        
 12  vram                   103577 non-null  float64     

In [8]:
df.head()

Unnamed: 0,timestamp,batch_size,image_size,backend,salad_machine_id,images,qr_data,prompt,steps,qrGenTime,imageGenTime,gpu,vram,totalTime,gpu_performance_score
0,2023-11-13 21:17:34.592,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.502758,111.598356,GTX 1650,4.0,112.106,1650.1
1,2023-11-13 21:17:34.592,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.509855,100.297959,GTX 1650,4.0,100.813,1650.1
2,2023-11-13 21:19:45.664,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.513511,101.023498,GTX 1650,4.0,101.543,1650.1
3,2023-11-13 21:21:56.736,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.510402,100.517272,GTX 1650,4.0,101.033,1650.1
4,2023-11-13 21:24:07.808,1,512,stable-fast-qr-code,67c6c009-1018-8a5f-8658-dd8162c753bd,['https://salad-benchmark-assets.download/sd-1...,https://salad.com,leafy green salad,15,0.502009,100.377556,GTX 1650,4.0,100.885,1650.1


In [10]:
def get_data_for_gpu(gpu: str):
    gpu_df = df[df["gpu"] == gpu]
    return gpu_df

gtx1650 = get_data_for_gpu("GTX 1650")
gtx1650.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2617 entries, 0 to 683
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   timestamp              2617 non-null   datetime64[ns]
 1   batch_size             2617 non-null   int64         
 2   image_size             2617 non-null   int64         
 3   backend                2617 non-null   object        
 4   salad_machine_id       2617 non-null   object        
 5   images                 2617 non-null   object        
 6   qr_data                2617 non-null   object        
 7   prompt                 2617 non-null   object        
 8   steps                  2617 non-null   int64         
 9   qrGenTime              2617 non-null   float64       
 10  imageGenTime           2617 non-null   float64       
 11  gpu                    2617 non-null   object        
 12  vram                   2617 non-null   float64       
 13  totalTime

In [11]:
# Sort by imageGenTime, ascending
gtx1650.sort_values(by=["imageGenTime"], inplace=True)
gtx1650.head(20)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gtx1650.sort_values(by=["imageGenTime"], inplace=True)


Unnamed: 0,timestamp,batch_size,image_size,backend,salad_machine_id,images,qr_data,prompt,steps,qrGenTime,imageGenTime,gpu,vram,totalTime,gpu_performance_score
293,2023-11-14 09:49:03.360,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/pricing,leafy green salad,15,0.03,31.327,GTX 1650,4.0,31.383,1650.1
394,2023-11-14 11:09:53.024,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/download,fire,15,0.026,31.329,GTX 1650,4.0,31.381,1650.1
295,2023-11-14 09:49:03.360,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/pricing,leafy green salad,15,0.028,31.361,GTX 1650,4.0,31.414,1650.1
386,2023-11-14 11:09:53.024,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/download,fire,15,0.029,31.514,GTX 1650,4.0,31.569,1650.1
291,2023-11-14 09:46:52.288,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/pricing,leafy green salad,15,0.025,31.551,GTX 1650,4.0,31.601,1650.1
297,2023-11-14 09:49:03.360,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/pricing,leafy green salad,15,0.026,31.77,GTX 1650,4.0,31.821,1650.1
289,2023-11-14 09:46:52.288,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/pricing,leafy green salad,15,0.029,31.807,GTX 1650,4.0,31.866,1650.1
500,2023-11-14 12:19:47.328,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/download,gold coins,15,0.027,31.839,GTX 1650,4.0,31.89,1650.1
391,2023-11-14 11:09:53.024,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/download,fire,15,0.028,31.857,GTX 1650,4.0,31.914,1650.1
503,2023-11-14 12:19:47.328,1,512,comfy,bbd7772d-427d-2b53-9431-34f76143ca53,[https://salad-benchmark-assets.download/sd-1-...,https://salad.com/download,gold coins,15,0.024,31.873,GTX 1650,4.0,31.921,1650.1
