In [1]:
!tree

[01;34m.[0m
├── [00m-tree -r <commit_hash> --name-only | grep speed_test.ipynb[0m
├── [00meda.ipynb[0m
├── [01;34minsights[0m
│   ├── [00mgen_speed_vs_input.png[0m
│   ├── [00mspeed_test_averages.csv[0m
│   ├── [00mspeed_test_requests.csv[0m
│   └── [00mttft_vs_input.png[0m
├── [00minsights.ipynb[0m
├── [00mnew_speed_test.ipynb[0m
├── [00mold_speed_test.ipynb[0m
├── [00mollama.ipynb[0m
├── [00mollama_test_output.txt[0m
├── [01;34mout[0m
│   ├── [00mcomparison_table.csv[0m
│   ├── [00mspeed_test_averages.csv[0m
│   └── [00mspeed_test_requests.csv[0m
├── [01;34mscripts[0m
│   ├── [00mget_full_summary.py[0m
│   ├── [00mget_speed_metrics.py[0m
│   └── [00mrun_all_models.py[0m
├── [00mspeed_test.ipynb[0m
├── [01;34mspeed_tests[0m
│   ├── [01;34mA2_x1[0m
│   │   ├── [01;34mQwen3-0.6B[0m
│   │   │   ├── [00m10000_length_1_parallel.txt[0m
│   │   │   ├── [00m10000_length_2_parallel.txt[0m
│   │   │   ├── [00m10000_length_5_parallel.txt[0m

In [50]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import re
import sys
import pandas as pd
from pathlib import Path
from typing import Dict, Any, List, Tuple

BASE = Path("speed_tests")

# какие ключи явно приводим к float / int
FLOAT_KEYS = {
    "ttft",
    "gen_tokens_per_second",
    "total_time",
    "total_concurrent_time",
}
INT_KEYS = {
    "total_tokens",
    "input_tokens",
    "output_tokens",
    "concurrent_requests",
    "valid_requests",
}

# regex-шаблоны
KV_RE = re.compile(r"^\s*([a-z_]+):\s*([0-9.]+)\s*$", re.IGNORECASE)
AVG_HEADER_RE = re.compile(
    r"Average metrics for\s+(\d+)/(\d+)\s+concurrent requests of\s+(\d+)\s+tokens each",
    re.IGNORECASE,
)
REQUEST_HEADER_RE = re.compile(r"^\s*Request\s+(\d+):\s*$", re.IGNORECASE)
FILE_PATTERN_RE = re.compile(
    r"(?P<input_len>\d+)_length_(?P<parallel>\d+)_parallel\.txt$",
    re.IGNORECASE
)

def cast_numbers(d: Dict[str, str]) -> Dict[str, Any]:
    out = {}
    for k, v in d.items():
        if k in INT_KEYS:
            try:
                out[k] = int(float(v))
            except Exception:
                out[k] = None
        elif k in FLOAT_KEYS:
            try:
                out[k] = float(v)
            except Exception:
                out[k] = None
        else:
            # попытка аккуратно привести
            try:
                fv = float(v)
                out[k] = int(fv) if fv.is_integer() else fv
            except Exception:
                out[k] = v
    return out

def parse_file(path: Path) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
    txt = path.read_text(encoding="utf-8", errors="ignore")
    lines = [ln.rstrip("\n") for ln in txt.splitlines()]

    # верхние k:v строки (ttft, gen_tokens_per_second, …)
    topline: Dict[str, str] = {}
    i = 0
    while i < len(lines):
        m = KV_RE.match(lines[i])
        if m:
            topline[m.group(1).lower()] = m.group(2)
            i += 1
            continue
        break

    # заголовок “Average metrics for …”
    avg_header = None
    for j in range(i, len(lines)):
        m = AVG_HEADER_RE.search(lines[j])
        if m:
            avg_header = (int(m.group(1)), int(m.group(2)), int(m.group(3)))
            i = j + 1
            break

    # блоки “Request N:”
    requests: List[Dict[str, Any]] = []
    curr = None
    for j in range(i, len(lines)):
        m = REQUEST_HEADER_RE.match(lines[j])
        if m:
            if curr:
                requests.append(curr)
            curr = {"request_id": int(m.group(1))}
            continue
        if curr is not None:
            m2 = KV_RE.match(lines[j])
            if m2:
                curr[m2.group(1).lower()] = m2.group(2)
    if curr:
        requests.append(curr)

    # метаданные из пути
    # speed_tests/<gpu>/<model>/<file>.txt
    try:
        gpu = path.parents[1].name
        model = path.parents[0].name
    except Exception:
        gpu = None
        model = None

    # метаданные из имени файла
    input_len = None
    parallel = None
    fm = FILE_PATTERN_RE.search(path.name)
    if fm:
        input_len = int(fm.group("input_len"))
        parallel = int(fm.group("parallel"))

    # строка «средних» по файлу
    avg_row: Dict[str, Any] = {
        "gpu": gpu,
        "model": model,
        "file": str(path),
        "filename": path.name,
        "input_len_from_name": input_len,
        "parallel_from_name": parallel,
    }
    avg_row.update(cast_numbers(topline))

    if avg_header:
        avg_row["valid_concurrent_from_header"] = avg_header[0]
        avg_row["total_concurrent_from_header"] = avg_header[1]
        avg_row["tokens_each_from_header"] = avg_header[2]

    # строки per-request
    req_rows: List[Dict[str, Any]] = []
    for r in requests:
        row = {
            "gpu": gpu,
            "model": model,
            "file": str(path),
            "filename": path.name,
        }
        row.update(cast_numbers(r))
        row["input_len_from_name"] = input_len
        row["parallel_from_name"] = parallel
        req_rows.append(row)

    return avg_row, req_rows

def prefer(a, b):
    return a if pd.notnull(a) else b

def main():
    if not BASE.exists():
        print(f"❌ Не нашёл директорию: {BASE.resolve()}")
        sys.exit(1)

    avg_rows: List[Dict[str, Any]] = []
    req_rows: List[Dict[str, Any]] = []

    for root, _, files in os.walk(BASE):
        for f in files:
            if not f.lower().endswith(".txt"):
                continue
            p = Path(root) / f
            try:
                avg_row, req = parse_file(p)
                avg_rows.append(avg_row)
                req_rows.extend(req)
            except Exception as e:
                avg_rows.append({
                    "gpu": Path(root).parts[-2] if len(Path(root).parts) >= 2 else None,
                    "model": Path(root).parts[-1] if len(Path(root).parts) >= 1 else None,
                    "file": str(p),
                    "filename": f,
                    "parse_error": str(e),
                })

    df_avg = pd.DataFrame(avg_rows)
    df_req = pd.DataFrame(req_rows)

    # удобные поля сценария
    if "input_len_from_name" in df_avg.columns:
        df_avg["scenario_input_len"] = df_avg["input_len_from_name"]
        if "tokens_each_from_header" in df_avg.columns:
            df_avg["scenario_input_len"] = df_avg.apply(
                lambda r: prefer(r["input_len_from_name"], r.get("tokens_each_from_header")), axis=1
            )
    if "parallel_from_name" in df_avg.columns:
        df_avg["scenario_parallel"] = df_avg["parallel_from_name"]

    # порядок колонок
    avg_cols = [
        "gpu","model","scenario_input_len","scenario_parallel",
        "concurrent_requests","valid_requests",
        "ttft","gen_tokens_per_second",
        "total_time","total_concurrent_time",
        "input_tokens","output_tokens","total_tokens",
        "tokens_each_from_header","valid_concurrent_from_header","total_concurrent_from_header",
        "filename","file"
    ]
    df_avg = df_avg.reindex(columns=[c for c in avg_cols if c in df_avg.columns] +
                                   [c for c in df_avg.columns if c not in avg_cols])

    req_cols = [
        "gpu","model","request_id","input_len_from_name","parallel_from_name",
        "ttft","gen_tokens_per_second","total_time",
        "input_tokens","output_tokens","total_tokens",
        "filename","file"
    ]
    df_req = df_req.reindex(columns=[c for c in req_cols if c in df_req.columns] +
                                  [c for c in df_req.columns if c not in req_cols])

    # сохраняем основные таблицы
    out_dir = Path("out")
    out_dir.mkdir(exist_ok=True)
    avg_csv = out_dir / "speed_test_averages.csv"
    req_csv = out_dir / "speed_test_requests.csv"
    df_avg.to_csv(avg_csv, index=False)
    df_req.to_csv(req_csv, index=False)

    # сводная сравнительная таблица (по средним метрикам)
    # группируем по ключам сценария
    group_keys = ["gpu", "model", "scenario_input_len", "scenario_parallel"]
    present_keys = [k for k in group_keys if k in df_avg.columns]
    agg_fields = {k: "mean" for k in ["ttft","gen_tokens_per_second","total_time","total_concurrent_time"]
                  if k in df_avg.columns}
    comp = df_avg.groupby(present_keys, dropna=False).agg(agg_fields).reset_index().sort_values(present_keys)
    comp_csv = out_dir / "comparison_table.csv"
    comp.to_csv(comp_csv, index=False)

    print("✅ Готово!")
    print(f"- Агрегаты по файлам: {avg_csv}")
    print(f"- По каждому запросу: {req_csv}")
    print(f"- Сравнительная таблица: {comp_csv}")

if __name__ == "__main__":
    main()


✅ Готово!
- Агрегаты по файлам: out/speed_test_averages.csv
- По каждому запросу: out/speed_test_requests.csv
- Сравнительная таблица: out/comparison_table.csv


In [51]:
sdf = pd.read_csv("out/speed_test_requests.csv")
# Rename models in the dataframe
sdf['model'] = sdf['model'].replace({
    'qwen3-8b': 'qwen3-8b-ollama',
    'qwen3-14b': 'qwen3-14b-ollama'
})
sdf
clean_df = sdf.drop(columns=["request_id", "total_time", "output_tokens", "total_tokens", "filename", "file"])
clean_df

Unnamed: 0,gpu,model,input_len_from_name,parallel_from_name,ttft,gen_tokens_per_second,input_tokens
0,T4_x4,Qwen3-32B-GPTQ-Int8,10000,1,11.36,13.58,10000
1,T4_x4,Qwen3-32B-GPTQ-Int8,5000,5,12.89,0.06,5000
2,T4_x4,Qwen3-32B-GPTQ-Int8,5000,5,28.63,14.65,5011
3,T4_x4,Qwen3-32B-GPTQ-Int8,5000,5,28.58,14.64,5011
4,T4_x4,Qwen3-32B-GPTQ-Int8,5000,5,28.63,14.63,5011
...,...,...,...,...,...,...,...
1249,A2_x1,qwen3-14b-ollama,10000,5,13.86,11.67,10000
1250,A2_x1,qwen3-14b-ollama,10000,5,35.01,11.68,10000
1251,A2_x1,qwen3-14b-ollama,10000,5,56.03,11.62,10000
1252,A2_x1,qwen3-14b-ollama,10000,5,77.13,11.63,10000


In [52]:
df = clean_df[clean_df['parallel_from_name'] == 1]

In [53]:
# Models in order of size
models_by_size = [
    'Qwen3-1.7B',
    'Qwen3-4B', 
    'qwen3-8b-ollama',
    'Qwen3-8B',
    'Qwen3-8B-AWQ',
    'qwen3-14b-ollama',
    'Qwen3-14B',
    'Qwen3-14B-INT8',
    'Qwen3-32B-GPTQ-Int8'
]
models_by_size


['Qwen3-1.7B',
 'Qwen3-4B',
 'qwen3-8b-ollama',
 'Qwen3-8B',
 'Qwen3-8B-AWQ',
 'qwen3-14b-ollama',
 'Qwen3-14B',
 'Qwen3-14B-INT8',
 'Qwen3-32B-GPTQ-Int8']

# Усредненные графики чисто для сравнения видеокарт

In [54]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create consistent model order across all GPUs using the predefined order
model_order = models_by_size
df = df.sort_values(by=["model", "gpu", "input_tokens"])


# Create custom GPU order for better comparison
# Group single GPU types together and multi-GPU types together
gpu_order = []
single_gpus = [gpu for gpu in df['gpu'].unique() if '_x1' in gpu]
multi_gpus = [gpu for gpu in df['gpu'].unique() if '_x1' not in gpu]

# Sort within each group and combine
single_gpus_sorted = sorted(single_gpus)
multi_gpus_sorted = sorted(multi_gpus)
gpu_order = single_gpus_sorted + multi_gpus_sorted

# Create visualizations
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('TTFT by GPU and Model', 'Tokens per Second by GPU and Model', 
                   'TTFT vs Input Length', 'Tokens per Second vs Parallel Requests'),
    specs=[[{"secondary_y": False}, {"secondary_y": False}],
           [{"secondary_y": False}, {"secondary_y": False}]]
)

# 1. TTFT by GPU and Model (box plot)
fig1 = px.box(df, x='gpu', y='ttft', color='model', 
              title='Time to First Token by GPU and Model',
              category_orders={'model': model_order, 'gpu': gpu_order})

# 2. Tokens per second by GPU and Model
fig2 = px.box(df, x='gpu', y='gen_tokens_per_second', color='model',
              title='Generation Speed by GPU and Model',
              category_orders={'model': model_order, 'gpu': gpu_order})
              
# 3. TTFT vs Input Length - show all models but only Qwen3-4B visible by default
fig3 = px.line(df, x='input_tokens', y='ttft', color='gpu', 
               line_dash='model',
               title='TTFT vs Input Length',
               category_orders={'model': model_order, 'gpu': gpu_order})

# Make only Qwen3-4B visible by default in fig3
for trace in fig3.data:
    if 'Qwen3-4B' not in trace.name:
        trace.visible = 'legendonly'

# Display individual plots
fig1.show()
fig2.show()
fig3.show()

In [55]:
df = clean_df[(clean_df['parallel_from_name'] == 1) & (clean_df['model'] != 'Qwen3-0.6B')]

# Performance heatmap with ordered GPUs
avg_perf = df.groupby(['gpu', 'model']).agg({
    'ttft': 'mean',
    'gen_tokens_per_second': 'mean'
}).reset_index()

# Create pivot table with custom GPU order and model order
pivot_data = avg_perf.pivot(index='gpu', columns='model', values='gen_tokens_per_second')
pivot_data = pivot_data.reindex(gpu_order)
pivot_data = pivot_data.reindex(columns=models_by_size)

fig_heatmap = px.imshow(
    pivot_data,
    title='Average Generation Speed Heatmap (tokens/sec)',
    color_continuous_scale='Viridis'
)
fig_heatmap.show()

# df

# Как мне кажется, самый привлекательный кандидат - две видеокарта А2 (А2_х2)
## Визуализируем его

In [56]:

# EDA for A2_x2 GPU
a2_x2_df = clean_df[clean_df['gpu'] == 'A2_x2']

print(f"A2_x2 Dataset shape: {a2_x2_df.shape}")
print(f"Models available: {sorted(a2_x2_df['model'].unique())}")
print(f"Input lengths: {sorted(a2_x2_df['input_len_from_name'].unique())}")
print(f"Parallel requests: {sorted(a2_x2_df['parallel_from_name'].unique())}")

# Performance by model and parallelism
fig_a2_perf = px.box(a2_x2_df, x='model', y='gen_tokens_per_second', color='parallel_from_name',
                     title='A2_x2: Generation Speed by Model and Parallelism',
                     category_orders={'model': models_by_size})
fig_a2_perf.show()

a2_x2_df = a2_x2_df[a2_x2_df['parallel_from_name'] == 1]

# TTFT analysis for A2_x2
fig_a2_ttft = px.scatter(a2_x2_df, x='input_tokens', y='ttft', 
                        color='model', size='parallel_from_name',
                        title='A2_x2: TTFT vs Input Length',
                        category_orders={'model': models_by_size})
fig_a2_ttft.show()


A2_x2 Dataset shape: (272, 7)
Models available: ['Qwen3-0.6B', 'Qwen3-1.7B', 'Qwen3-14B-INT8', 'Qwen3-4B', 'Qwen3-8B']
Input lengths: [np.int64(1000), np.int64(5000), np.int64(10000), np.int64(15000), np.int64(20000), np.int64(25000), np.int64(30000)]
Parallel requests: [np.int64(1), np.int64(2), np.int64(5)]
