From 4124e1d732603b6472b2bc127b836f28c83b9378 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=86=90=EC=84=B1=EC=A4=80?= Date: Thu, 2 Jul 2026 19:17:10 +0900 Subject: [PATCH] Add safe DeepSeek env helper --- examples/ablation/configure_deepseek_env.py | 96 +++++++++++++++++++ .../diagnostics/public_scale_20260702.md | 1 + tests/test_deepseek_env_config.py | 56 +++++++++++ 3 files changed, 153 insertions(+) create mode 100644 examples/ablation/configure_deepseek_env.py create mode 100644 tests/test_deepseek_env_config.py diff --git a/examples/ablation/configure_deepseek_env.py b/examples/ablation/configure_deepseek_env.py new file mode 100644 index 0000000..545b809 --- /dev/null +++ b/examples/ablation/configure_deepseek_env.py @@ -0,0 +1,96 @@ +"""Safely write DeepSeek benchmark credentials to a gitignored local .env. + +The script intentionally does not accept the API key as a command-line +argument, because command arguments are easy to leak through shell history and +process listings. It prompts with getpass and writes only the requested env var +to a local .env file with user-only permissions. +""" + +from __future__ import annotations + +import argparse +import getpass +import os +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_ENV_PATH = REPO_ROOT.parent / ".env" +DEFAULT_ENV_NAME = "DEEPSEEK_API_KEY" + + +def _split_env_line(line: str) -> tuple[str, str] | None: + if not line.strip() or line.lstrip().startswith("#") or "=" not in line: + return None + key, value = line.split("=", 1) + key = key.strip() + if not key: + return None + return key, value + + +def _quote_env_value(value: str) -> str: + escaped = value.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + + +def _replace_env_value(lines: list[str], *, key: str, value: str) -> list[str]: + new_line = f"{key}={_quote_env_value(value)}" + updated: list[str] = [] + replaced = False + for line in lines: + parsed = _split_env_line(line) + if parsed is not None and parsed[0] == key: + if not replaced: + updated.append(new_line) + replaced = True + continue + updated.append(line) + if not replaced: + if updated and updated[-1].strip(): + updated.append("") + updated.append(new_line) + return updated + + +def _write_env_value(path: Path, *, key: str, value: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + lines = path.read_text(encoding="utf-8").splitlines() if path.exists() else [] + updated = _replace_env_value(lines, key=key, value=value) + tmp = path.with_suffix(path.suffix + ".tmp") + fd = os.open(tmp, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + with os.fdopen(fd, "w", encoding="utf-8") as handle: + handle.write("\n".join(updated) + "\n") + os.replace(tmp, path) + os.chmod(path, 0o600) + + +def _read_key(prompt: str) -> str: + value = getpass.getpass(prompt).strip() + if not value: + raise SystemExit("Empty API key; nothing written.") + return value + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + "--env-path", + type=Path, + default=DEFAULT_ENV_PATH, + help="Local .env path to update. Default: parent workspace .env.", + ) + parser.add_argument( + "--env-name", + default=DEFAULT_ENV_NAME, + help="Environment variable name to write.", + ) + args = parser.parse_args(argv) + + key = _read_key(f"{args.env_name}: ") + _write_env_value(args.env_path, key=args.env_name, value=key) + print(f"Wrote {args.env_name} to {args.env_path} with mode 0600.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/examples/ablation/diagnostics/public_scale_20260702.md b/examples/ablation/diagnostics/public_scale_20260702.md index 7a6430c..65c5ace 100644 --- a/examples/ablation/diagnostics/public_scale_20260702.md +++ b/examples/ablation/diagnostics/public_scale_20260702.md @@ -47,6 +47,7 @@ PYTHONUNBUFFERED=1 SYNAPTIC_SQLITE_FTS_AND_FIRST_THRESHOLD=20 SYNAPTIC_SQLITE_FT # DeepSeek Flash agent-loop quality path. # Put DEEPSEEK_API_KEY in shell env, the repo .env, or the parent workspace .env. # Do not put the key in docs, commands, JSONL, or DBs. +uv run python examples/ablation/configure_deepseek_env.py PYTHONUNBUFFERED=1 SYNAPTIC_SQLITE_FTS_AND_FIRST_THRESHOLD=20 SYNAPTIC_SQLITE_FTS_LEXICAL_RERANK_POOL=500 uv run python examples/ablation/run_agent_loop_benchmarks.py --llm-preset deepseek --subset 20 --msmarco-path tests/benchmark/data/msmarco_passage_full.json --corpus-limit 8841823 --sqlite-db-path tests/benchmark/data/msmarco_full.db --max-turns 5 --llm-timeout 180 --preflight-timeout 15 --out-jsonl examples/ablation/diagnostics/agent_loop_deepseek_v4_flash_20.jsonl --resume # Historical local Ollama fallback smoke when the H100/Qwen3.6 tunnel is down. diff --git a/tests/test_deepseek_env_config.py b/tests/test_deepseek_env_config.py new file mode 100644 index 0000000..c35ad86 --- /dev/null +++ b/tests/test_deepseek_env_config.py @@ -0,0 +1,56 @@ +"""Tests for the local DeepSeek .env configuration helper.""" + +from __future__ import annotations + +import importlib.util +import stat +import sys +from pathlib import Path + +HELPER_PATH = ( + Path(__file__).resolve().parents[1] / "examples" / "ablation" / "configure_deepseek_env.py" +) +SPEC = importlib.util.spec_from_file_location("configure_deepseek_env", HELPER_PATH) +assert SPEC is not None and SPEC.loader is not None +helper = importlib.util.module_from_spec(SPEC) +sys.modules[SPEC.name] = helper +SPEC.loader.exec_module(helper) + + +def test_replace_env_value_preserves_other_lines_and_dedupes() -> None: + lines = [ + "# local secrets", + "OTHER_KEY=keep", + "DEEPSEEK_API_KEY=old", + "DEEPSEEK_API_KEY=older", + ] + + updated = helper._replace_env_value(lines, key="DEEPSEEK_API_KEY", value="new-secret") + + assert updated == [ + "# local secrets", + "OTHER_KEY=keep", + 'DEEPSEEK_API_KEY="new-secret"', + ] + + +def test_write_env_value_uses_user_only_permissions(tmp_path: Path) -> None: + env_path = tmp_path / ".env" + + helper._write_env_value(env_path, key="DEEPSEEK_API_KEY", value='sk-test"quoted') + + assert env_path.read_text(encoding="utf-8") == ('DEEPSEEK_API_KEY="sk-test\\"quoted"\n') + assert stat.S_IMODE(env_path.stat().st_mode) == 0o600 + + +def test_main_does_not_print_secret(tmp_path: Path, monkeypatch, capsys) -> None: + env_path = tmp_path / ".env" + value = "value-for-test" + monkeypatch.setattr(helper, "_read_key", lambda prompt: value) + + assert helper.main(["--env-path", str(env_path)]) == 0 + + captured = capsys.readouterr() + assert value not in captured.out + assert value not in captured.err + assert value in env_path.read_text(encoding="utf-8")