# 0. Sequential run 

## Config

In [None]:
input = "stories15M.bin"
steps = "500"


In [None]:
### Running this Notebook

To run this notebook, make sure you've installed the dependencies listed in `requirements.txt`:

```bash
pip install -r requirements.txt
```

You also need to download the model binary:

```bash
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
```


## Generate measurements

In [2]:
import subprocess
import datetime
from pathlib import Path

Path("measurements").mkdir(exist_ok=True)

date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = f"measurements/output_{input}_seq_{date}.json"

subprocess.run(["make", "run"], check=True)


with open(output_path, "w") as f:
    subprocess.run(["./run", "stories15M.bin", "-s", "500"], stdout=f, check=True)

file = output_path

gcc -O3 -o run run.c -lm


run.c: In function ‘main’:
 1119 |         else if (argv[i][1] == 'g') { global_instr = argv[i + 1]; }
      |                                                    ^


gcc -O3 -o runq runq.c -lm


achieved tok/s: 147.033534


In [3]:
import json

def print_json_schema(data, indent=0):
    prefix = '  ' * indent
    if isinstance(data, dict):
        for key, value in data.items():
            print(f"{prefix}{key}: {type(value).__name__}")
            print_json_schema(value, indent + 1)
    elif isinstance(data, list):
        print(f"{prefix}- list of {len(data)} items")
        if data:
            print_json_schema(data[0], indent + 1)
    else:
        # leaf node
        pass

with open(file) as f:
    data = json.load(f)

print_json_schema(data)

n_steps: int
n_generation_steps: int
encode_time: float
sampling: list
  - list of 256 items
decode_time: list
  - list of 256 items
forward_instr: dict
  n_steps: int
  global_time: list
    - list of 256 items
  rmsnorm_first_time: list
    - list of 256 items
  matmul_qkv_time: list
    - list of 256 items
  rope_time: list
    - list of 256 items
  multihead_time: list
    - list of 256 items
  matmul_output_attention_time: list
    - list of 256 items
  FFN_rmsnorm_time: list
    - list of 256 items
  matmul_FFN_time: list
    - list of 256 items
  swiGLU_time: list
    - list of 256 items
  matmul_output_FFN_time: list
    - list of 256 items
  rmsnorm_final_time: list
    - list of 256 items
  matmul_logits_time: list
    - list of 256 items
config: dict
  dim: int
  hidden_dim: int
  n_layers: int
  n_heads: int
  n_kv_heads: int
  vocab_size: int
  seq_len: int


In [4]:
import json
import numpy as np
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

encode = data["encode_time"]
sampling = np.sum(data["sampling"])
forward_global = np.sum(data["forward_instr"]["global_time"])
decode = np.sum(data["decode_time"])

df = pd.DataFrame({
    "phase": ["encode", "forward","sampling","decode"],
    "time in ms": [encode, forward_global,sampling, decode]
})

px.bar(df, x="phase", y="time in ms",title="Comparaison between the different LLM phase",width=500, log_y=True)


In [5]:
import json
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

global_time = data["forward_instr"]["multihead_time"]
sampling = data["sampling"]

df = pd.DataFrame({
    "step": list(range(len(global_time))) + list(range(len(sampling))),
    "time": global_time + sampling,
    "type": ["forward_global"] * len(global_time) + ["sampling"] * len(sampling)
})

px.line(df, x="step", y="time", color="type",log_y=False, markers=True, labels={"time": "time (ms)"}, title="Forward global vs Sampling time per step")


In [6]:
import json
import numpy as np
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

forward = data["forward_instr"]
keys = [
    "rmsnorm_first_time",
    "matmul_qkv_time",
    "rope_time",
    "multihead_time",
    "matmul_output_attention_time",
    "FFN_rmsnorm_time",
    "matmul_FFN_time",
    "swiGLU_time",
    "matmul_output_FFN_time",
    "rmsnorm_final_time",
    "matmul_logits_time"
]

rows = []
for k in keys:
    values = [v for v in forward[k] if v != 0]
    rows += [(k, v) for v in values]

df = pd.DataFrame(rows, columns=["phase",  "time (in ms)"])

px.box(df, x="phase", y="time (in ms)",log_y = False,title = "Boxplot of difference phase per execution")


In [7]:
import json
import numpy as np
import pandas as pd
import plotly.express as px

with open(file) as f:
    data = json.load(f)

forward = data["forward_instr"]
keys = [
    "rmsnorm_first_time",
    "matmul_qkv_time",
    "rope_time",
    "multihead_time",
    "matmul_output_attention_time",
    "FFN_rmsnorm_time",
    "matmul_FFN_time",
    "swiGLU_time",
    "matmul_output_FFN_time",
    "rmsnorm_final_time",
    "matmul_logits_time"
]

sums = [np.sum(forward[k]) for k in keys]
total = sum(sums)
percentages = [s / total * 100 for s in sums]

df = pd.DataFrame({"phase": keys, "percentage": percentages})
df = df.sort_values("percentage", ascending=False)
px.bar(df, x="phase", y="percentage",title= "Percentage of time spent in the different phases of forward pass")

# 1. Multi thread comparison