# 0. Sequential run 

## Config

In [10]:
input = "llama_3.bin"
steps = "500"


## Generate measurements

In [11]:
import subprocess
import datetime
from pathlib import Path

Path("measurements").mkdir(exist_ok=True)

date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = f"measurements/output_{input}_seq_{date}.json"

subprocess.run(["make", "cliftinstr"], check=True)


with open(output_path, "w") as f:
    subprocess.run(["./cliftinstr", "-m", "llama3.2_1b.bin", "-n", "20", "-p", "The son applies for a credit card with an identity already stolen. He takes the card and spends almost all"], stdout=f, check=True)

file = output_path

make: 'cliftinstr' is up to date.


transformer_t configuration:
- embedding_dim:  2048
- hidden_dim:     8192
- layer_count:    16
- q_head_count:   32
- kv_head_count:  8
- vocabulary_len: 128256
- context_len:    2048
Prompt processing (prefill): 25 tokens in  4.064 s (6.151575 tok/s)
Token generation  (decode):  19 tokens in  3.985 s (4.767880 tok/s)


In [12]:
import json

def print_json_schema(data, indent=0):
    prefix = '  ' * indent
    if isinstance(data, dict):
        for key, value in data.items():
            print(f"{prefix}{key}: {type(value).__name__}")
            print_json_schema(value, indent + 1)
    elif isinstance(data, list):
        print(f"{prefix}- list of {len(data)} items")
        if data:
            print_json_schema(data[0], indent + 1)
    else:
        # leaf node
        pass

with open(file) as f:
    data = json.load(f)

print_json_schema(data)

JSON_1_COMPT: dict
  ENCODE_TIME: float
  FORWARD_TIME_GENERATION: float
JSON_1: dict
  ENCODE_TIME: float
  FORWARD_TIME_GENERATION: float
  FORWARD_TIME_PROMPT: float
  SAMPLE_TIME: float
JSON_2: dict
  FFN_RMSNORM: list
    - list of 40 items
  FINAL_RMSNORM: list
    - list of 40 items
  MAMTUL_LOGITS: list
    - list of 40 items
  MATMUL_FFN: list
    - list of 40 items
  MATMUL_OUTPUT_ATTENTION: list
    - list of 40 items
  MATMUL_OUTPUT_FFN: list
    - list of 40 items
  MATMUL_QKV: list
    - list of 40 items
  MHA: list
    - list of 40 items
  RMSNORM_INIT: list
    - list of 40 items
  ROPE: list
    - list of 40 items
  SwiGLU: list
    - list of 40 items
JSON_3: dict


In [15]:
import json
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

# On récupère le sous-dictionnaire JSON_1
json1 = data["JSON_1"]

# Création dynamique du DataFrame
df = pd.DataFrame(list(json1.items()), columns=["phase", "time (ms)"])

# Affichage en bar chart
fig = px.bar(
    df,
    x="phase",
    y="time (ms)",
    title="TIming of different sections",
    width=600
)
fig.show()


In [28]:
import json
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

json2 = data["JSON_2"]

df = pd.DataFrame({
    "component": list(json2.keys()),
    "time (ms)": [values[0] if isinstance(values, list) and values else None for values in json2.values()]
})

df = df.dropna()

fig = px.bar(
    df,
    x="component",
    y="time (ms)",
    title="Component time for PROMPT PROCESSING",
    log_y=True,
    width=800,
    text_auto=".2f"
)
fig.show()


In [29]:
df_2 = pd.DataFrame({
    "component": list(json2.keys()),
    "time (ms)": [values[1] if isinstance(values, list) and values else None for values in json2.values()]
})
df = df.dropna()

fig = px.bar(
    df,
    x="component",
    y="time (ms)",
    title="Component time for CACHE LOADING",
    log_y=True,
    width=800,
    text_auto=".2f"
)
fig.show()

In [19]:
import json
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

json2 = data["JSON_2"]

df = pd.DataFrame({
    "component": list(json2.keys()),
    "time (ms)": [
        sum(values[2:]) / len(values[2:]) if isinstance(values, list) and len(values) > 2 else None
        for values in json2.values()
    ]
})

df = df.dropna()

fig = px.bar(
    df,
    x="component",
    y="time (ms)",
    title="Average component time for TOKEN GENERATION",
    log_y=True,
    text_auto='.2f',
    width=800
)
fig.show()


In [40]:
total = sum(values[0] for values in json2.values() if isinstance(values, list) and values)
print(total)

4562.1753


In [None]:
import json
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

json2 = data["JSON_2"]

rows = []
for component, values in json2.items():
    if not isinstance(values, list):
        continue
    cleaned = [v for v in values[2:] if v != 0]
    for i, v in enumerate(cleaned):
        rows.append({
            "step": i,
            "time": v,
            "component": component
        })

df = pd.DataFrame(rows)

fig = px.line(
    df,
    x="step",
    y="time",
    color="component",
    markers=True,
    labels={"time": "time (ms)"},
    title="Per-step time per component during token generation"
)

fig.show()


In [82]:
import json
import numpy as np
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

forward = data["forward_instr"]
keys = [
    "rmsnorm_first_time",
    "matmul_qkv_time",
    "rope_time",
    "multihead_time",
    "matmul_output_attention_time",
    "FFN_rmsnorm_time",
    "matmul_FFN_time",
    "swiGLU_time",
    "matmul_output_FFN_time",
    "rmsnorm_final_time",
    "matmul_logits_time"
]

rows = []
for k in keys:
    values = [v for v in forward[k] if v != 0]
    rows += [(k, v) for v in values]

df = pd.DataFrame(rows, columns=["phase",  "time (in ms)"])

px.box(df, x="phase", y="time (in ms)",log_y = False,title = "Boxplot of difference phase per execution")


In [None]:
import json
import numpy as np
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

json2 = data["JSON_2"]

first_values = {
    k: v[0] for k, v in json2.items()
    if isinstance(v, list) and len(v) > 0
}
total_first = sum(first_values.values())
percent_first = {
    k: v / total_first * 100 for k, v in first_values.items()
}

df_first = pd.DataFrame({
    "component": list(percent_first.keys()),
    "percentage": list(percent_first.values())
}).sort_values("percentage", ascending=False)

fig1 = px.bar(
    df_first,
    x="component",
    y="percentage",
    title="Percentage of time for PROMPT_PROCESSING",
    text_auto=".2f"
)
fig1.show()

main_values = {
    k: sum(v[2:]) for k, v in json2.items()
    if isinstance(v, list) and len(v) > 2
}
total_main = sum(main_values.values())
percent_main = {
    k: v / total_main * 100 for k, v in main_values.items()
}

df_main = pd.DataFrame({
    "component": list(percent_main.keys()),
    "percentage": list(percent_main.values())
}).sort_values("percentage", ascending=False)

fig2 = px.bar(
    df_main,
    x="component",
    y="percentage",
    title="Percentage of time for TOKEN_GENERATION",
    text_auto=".2f"
)
fig2.show()


# 1. Multi thread comparison