In [177]:
from asyncio import new_event_loop

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [276]:
def process(device):
    df = pd.read_csv(f"./data/{device}_outputs.csv")

    df["input_tokens_per_prompt"] = df["input_tokens"] / df["prompts"]
    df["output_tokens_per_prompt"] = df["output_tokens"] / df["prompts"]
    df["prompts"] = df["prompts"].astype(str)

    df["load_time"] = df["load_time"]/1000
    df["prompt_eval_time"] = df["prompt_eval_time"]/1000
    df["output_eval_time"] = df["output_eval_time"]/1000
    df["total_time"] = df["total_time"]/1000

    df["load_time_per_input_token"] = df["load_time"] / df["input_tokens"]
    df["prompt_eval_time_per_input_token"] = df["prompt_eval_time"] / df["input_tokens"]
    df["output_eval_time_per_output_token"] = df["output_eval_time"] / df["output_tokens"]
    df["total_time_per_input_token"] = df["total_time"] / df["input_tokens"]
    df["total_time_per_output_token"] = df["total_time"] / df["output_tokens"]


    return df

data_df = process("RTX4060")

In [179]:
# remove color from the fig below and run again
fig = px.scatter(data_df, x= range(len(data_df)), y= "load_time", marginal_y="histogram")
fig.show()
fig = px.scatter(data_df, x= range(len(data_df)), y= "total_time", color = 'prompts', marginal_y="violin")
fig.show()
fig = px.scatter(data_df, x= range(len(data_df)), y= "load_time_per_input_token", color="prompts", size="input_tokens", marginal_y="box")
fig.show()
fig = px.scatter(data_df, x= range(len(data_df)), y= "total_time_per_output_token", color = 'prompts', marginal_y="violin")
fig.show()

In [180]:
fig = px.scatter(data_df, x= range(len(data_df)), y= "total_time_per_output_token", color = 'prompt_length', marginal_y="violin")
fig.show()
fig = px.scatter(data_df, x= range(len(data_df)), y= "output_tokens_per_prompt", color = 'prompt_length', marginal_y="violin")
fig.show()
fig = px.scatter(data_df, x= range(len(data_df)), y= "output_tokens_per_prompt", color = 'output_length', marginal_y="box")
fig.show()

In [181]:
fig = px.scatter(data_df, x= range(len(data_df)), y= "total_time", size="output_tokens", color = 'output_length', marginal_y="violin")
fig.show()
fig = px.scatter(data_df, x= "output_tokens" , y= ["total_time", "load_time", "prompt_eval_time", "output_eval_time"], trendline="lowess")
fig.show()
fig = px.scatter(data_df, x= "input_tokens", y= ["total_time", "load_time", "prompt_eval_time", "output_eval_time"], trendline="lowess")
fig.show()

In [182]:
fig = px.scatter(data_df, y= "output_eval_time_per_output_token" , x= "output_tokens", trendline="lowess", marginal_y="histogram")
# ['lowess', 'rolling', 'ewm', 'expanding', 'ols']
fig.show()

In [183]:
fig = px.scatter(data_df, x= range(len(data_df)), y= "total_time_per_input_token", size= "input_tokens", color = 'prompt_complexity', marginal_y="violin")
fig.show()
fig = px.scatter(data_df, x= range(len(data_df)), y= "total_time_per_output_token", size= "output_tokens", color = 'prompt_complexity', marginal_y="violin")
fig.show()
fig = px.scatter(data_df, x= range(len(data_df)), y= "output_tokens", color = 'prompt_complexity', marginal_y="violin")
fig.show()
# prompt complexity has no effect on latency, ignore the outliers

In [184]:
rtx_df = process("RTX4060")
u9_df = process("Ultra9-185H")
m3_df = process("M3")

all_df = pd.concat([rtx_df, u9_df, m3_df], ignore_index=True)
device_df = all_df.groupby("device")

merged = pd.concat([device_df["load_time"].mean(),device_df["load_time"].var(),device_df["load_time_per_input_token"].mean(), device_df["prompt_eval_time_per_input_token"].mean(), device_df["output_eval_time_per_output_token"].mean()], axis=1)
merged.columns = ["load_time_mean", "load_time_var","load_time_per_input_token_mean","prompt_eval_time_per_token_mean", "output_eval_time_per_token_mean"]

fig = px.bar(merged, y=["load_time_mean", "load_time_var"])
fig.show()

fig = px.bar(merged, y=["load_time_per_input_token_mean","prompt_eval_time_per_token_mean", "output_eval_time_per_token_mean"])
fig.show()

Index(['prompts', 'input_tokens', 'output_tokens', 'total_time', 'load_time',
       'prompt_eval_time', 'output_eval_time', 'prompt_complexity',
       'prompt_length', 'output_length', 'device', 'input_tokens_per_prompt',
       'output_tokens_per_prompt', 'load_time_per_input_token',
       'prompt_eval_time_per_input_token', 'output_eval_time_per_output_token',
       'total_time_per_input_token', 'total_time_per_output_token'],
      dtype='object')
