perf: let bagel scale to zero (#85)

OpenRouterTeam · Apr 4, 2024 · 9c099b3 · 9c099b3
1 parent 54cf516
commit 9c099b3
Showing 1 changed file with 0 additions and 1 deletion.
diff --git a/modal/runner/containers/vllm_unified.py b/modal/runner/containers/vllm_unified.py
@@ -165,7 +165,6 @@ def __init__(self):
     gpu=modal.gpu.A100(count=1, memory=40),
     concurrent_inputs=4,
     max_containers=1,
-    keep_warm=1,
     max_model_len=8_000,  # Reduced from original 200k
     quantization="GPTQ",
     dtype="float16",  # vLLM errors when using dtype="auto" with this model