# 远程压测


In [1]:
import os, sys, json, time, subprocess, textwrap
from pathlib import Path
import resource

def find_repo_root(start: Path) -> Path:
    start = start.resolve()
    for p in [start, *start.parents]:
        if (p / "scripts" / "benchmark.py").exists():
            return p
    raise FileNotFoundError("找不到仓库根目录（缺少 scripts/benchmark.py）。请用仓库根目录启动 Jupyter。")

REPO_ROOT = find_repo_root(Path.cwd())

# 目标 
RTX_HOST = "10.200.98.98"

PROXY_PORT = 8080

AI_DEMO_PORT = 18080

# 输出目录
OUT_DIR = REPO_ROOT / "bench_out"
OUT_DIR.mkdir(parents=True, exist_ok=True)

print("OUT_DIR=", OUT_DIR.resolve())
print("Python=", sys.version.split()[0])
print("RepoRoot=", REPO_ROOT)

nofile_soft, nofile_hard = resource.getrlimit(resource.RLIMIT_NOFILE)
print(f"RLIMIT_NOFILE soft={nofile_soft} hard={nofile_hard}")
if nofile_soft < 20000:
    print("WARNING: 当前进程 fd 上限偏低；10K 并发连接可能失败。建议退出后用更高 ulimit 启动 Jupyter。")


OUT_DIR= /home/huazi/Workspace/Projects/linux大作业/notebooks/bench_out
Python= 3.14.2
Repo= /home/huazi/Workspace/Projects/linux大作业/notebooks
RLIMIT_NOFILE soft=524288 hard=524288


## 1) 快速探活：拉取 `/stats`

In [2]:
import urllib.request

def get_json(url: str, timeout: float = 2.0):
    with urllib.request.urlopen(url, timeout=timeout) as r:
        data = r.read()
    return json.loads(data.decode('utf-8'))

stats_url = f"http://{RTX_HOST}:{PROXY_PORT}/stats"
j = get_json(stats_url, timeout=3.0)
print("OK /stats")
print("uptime_sec:", j.get("uptime_sec"))
print("active_connections:", j.get("active_connections"))
print("avg_qps:", j.get("avg_qps"))
print("io:", j.get("io"))


OK /stats
uptime_sec: 2508
active_connections: 1
avg_qps: 0.0
io: {'configured_model': 'epoll', 'runtime_model': 'epoll', 'supported_models': ['select', 'poll', 'epoll', 'uring']}


## 2) 10K 并发


In [27]:
def run_cmd(cmd: list[str], timeout_s: float | None = None) -> str:
    print("$", " ".join(cmd))
    p = subprocess.run(cmd, cwd=str(REPO_ROOT), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, timeout=timeout_s)
    print(p.stdout)
    p.check_returncode()
    return p.stdout

# 兜底：如果你没先运行「初始化」那格，确保 REPO_ROOT/OUT_DIR 已定义。
if "REPO_ROOT" not in globals():
    from pathlib import Path
    def _find_repo_root(start: Path) -> Path:
        start = start.resolve()
        for p in [start, *start.parents]:
            if (p / "scripts" / "benchmark.py").exists():
                return p
        raise FileNotFoundError("找不到仓库根目录（缺少 scripts/benchmark.py）。请用仓库根目录启动 Jupyter。")
    REPO_ROOT = _find_repo_root(Path.cwd())
    print("RepoRoot=", REPO_ROOT)

if "OUT_DIR" not in globals():
    from pathlib import Path
    OUT_DIR = Path(REPO_ROOT) / "bench_out"
    OUT_DIR.mkdir(parents=True, exist_ok=True)
    print("OUT_DIR=", OUT_DIR.resolve())

TOTAL = 10_000
HOLD_S = 20
CONCURRENCY = 10_000  # 真正的 10K 并发

# 若本机 fd 上限不足，会直接失败；先自动降级，提示你提高 ulimit 后再跑 10K。
if nofile_soft < CONCURRENCY + 2048:
    print(f"WARNING: RLIMIT_NOFILE soft={nofile_soft} 不足以支撑 concurrency={CONCURRENCY}。")
    print("建议：退出 Jupyter 后用更高的 ulimit -n 重新启动，再把 CONCURRENCY 改回 10000。")
    CONCURRENCY = min(2000, TOTAL)

# connect_hold 会按 concurrency 分批（waves）执行；global-timeout 需要覆盖所有 waves。
waves = (TOTAL + CONCURRENCY - 1) // CONCURRENCY
global_timeout = int(HOLD_S * waves + 45)  # 额外留 45s 缓冲，避免 ERROR: global timeout
subprocess_timeout = global_timeout + 30
print(f"plan: total={TOTAL} concurrency={CONCURRENCY} hold_s={HOLD_S} waves={waves} global_timeout={global_timeout}s")

out_json = OUT_DIR / "remote_conn_hold_10k.json"
cmd = [
    sys.executable, str(REPO_ROOT / "scripts" / "benchmark.py"),
    "--host", RTX_HOST,
    "--port", str(PROXY_PORT),
    "--bench", "connect_hold",
    "--total", str(TOTAL),
    "--concurrency", str(CONCURRENCY),
    "--hold", str(HOLD_S),
    "--timeout", "3",
    "--global-timeout", str(global_timeout),
    "--output", str(out_json),
]
run_cmd(cmd, timeout_s=subprocess_timeout)
print("saved:", out_json)


plan: total=10000 concurrency=10000 hold_s=20 waves=1 global_timeout=65s
$ /home/huazi/Workspace/Projects/linux大作业/.venv/bin/python /home/huazi/Workspace/Projects/linux大作业/scripts/benchmark.py --host 10.200.98.98 --port 8080 --bench connect_hold --total 10000 --concurrency 10000 --hold 20 --timeout 3 --global-timeout 65 --output /home/huazi/Workspace/Projects/linux大作业/bench_out/remote_conn_hold_10k.json
mode=epoll bench=connect_hold ok=0 failed=10000 elapsed_s=8.00 qps=0.00
connect_hold: hold_s=20.00 concurrency=10000 total=10000

saved: /home/huazi/Workspace/Projects/linux大作业/bench_out/remote_conn_hold_10k.json


In [None]:
data = json.loads(out_json.read_text(encoding='utf-8'))
print(json.dumps(data, ensure_ascii=False, indent=2)[:2000])


## 3) /stats 延迟与吞吐压测（http_stats）

这个测试会高并发请求 RTX 的 `GET /stats`，输出 p50/p90/p99 延迟和 QPS。

注意：`/stats` 不是数据面业务请求，但适合做“可观测接口在高频拉取下是否稳定”的基准。

In [None]:
out_json = OUT_DIR / "remote_http_stats.json"
cmd = [
    sys.executable, str(REPO_ROOT / "scripts" / "benchmark.py"),
    "--host", RTX_HOST,
    "--port", str(PROXY_PORT),
    "--bench", "http_stats",
    "--total", "20000",
    "--concurrency", "400",
    "--timeout", "2",
    "--global-timeout", "120",
    "--output", str(out_json),
]
run_cmd(cmd, timeout_s=150)
print("saved:", out_json)


In [None]:
data = json.loads(out_json.read_text(encoding='utf-8'))
print(json.dumps(data, ensure_ascii=False, indent=2)[:2000])

# 可选画图：延迟分布（如果你的环境有 matplotlib）
try:
    import matplotlib.pyplot as plt
    lat = data.get('latency_ms', {})
    if lat:
        xs = ['p50','p90','p99','avg']
        ys = [lat.get('p50_ms',0), lat.get('p90_ms',0), lat.get('p99_ms',0), lat.get('avg_ms',0)]
        plt.figure(figsize=(6,3))
        plt.bar(xs, ys)
        plt.title('GET /stats latency (ms)')
        plt.ylabel('ms')
        plt.show()
except Exception as e:
    print('matplotlib unavailable:', e)


## 4) （可选）“巨大计算需求”压测到 RTX demo

如果你在 RTX 上启动了 `ai_demo`（端口 `18080`，路径 `/infer?work_ms=...`），就用下面脚本从本机远程压测：

```bash
python3 scripts/load_test.py --base http://10.200.98.98:18080 --path /infer --duration 30 --concurrency 400 --work-ms 800 --mode spread
```

运行后打开：
- `http://10.200.98.98:18080/dashboard`

你会在“后端 AI/GPU 指标表格 + GPU/显存/队列曲线”里看到分配与负载变化。