CentML · V2arK · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/centml/sdk/utils/config_file.py b/centml/sdk/utils/config_file.py
@@ -0,0 +1,16 @@
+import os
+from typing import Optional
+
+from platform_api_python_client import ConfigFileMount
+
+
+# Load a file off disk into a ConfigFileMount. `mount_path` is the parent
+# directory inside the container; the file lands at `mount_path/filename`.
+# Field-level validation (size cap, filename charset, mount_path rules) is
+# left to the API so SDK doesn't drift when server limits change.
+def load_config_file_mount(path: str, mount_path: str, filename: Optional[str] = None) -> ConfigFileMount:
+    # newline="" disables universal-newline translation so CRLF/CR line
+    # endings reach the server byte-faithful instead of being normalized to \n.
+    with open(path, "r", encoding="utf-8", newline="") as f:
+        content = f.read()
+    return ConfigFileMount(filename=filename or os.path.basename(path), mount_path=mount_path, content=content)
diff --git a/examples/sdk/create_inference.py b/examples/sdk/create_inference.py
@@ -1,6 +1,7 @@
 import centml
 from centml.sdk.api import get_centml_client
 from centml.sdk import DeploymentType, CreateInferenceV3DeploymentRequest, UserVaultType
+from centml.sdk.utils.config_file import load_config_file_mount
 
 
 def main():
@@ -22,6 +23,12 @@ def main():
             max_unavailable=0,  # Keep all pods available during updates
             healthcheck="/",
             concurrency=10,
+            # Mounts ./default.conf at /etc/nginx/conf.d/default.conf. mount_path
+            # is the parent directory; filename defaults to os.path.basename(path)
+            # so the resulting file lands at mount_path/filename. Pass an inline
+            # ConfigFileMount(filename=..., mount_path=..., content=...) if the
+            # content is already in memory.
+            config_file=load_config_file_mount(path="./default.conf", mount_path="/etc/nginx/conf.d"),
         )
         response = cclient.create_inference(request)
         print("Create deployment response: ", response)

diff --git a/examples/sdk/create_inference_vllm.py b/examples/sdk/create_inference_vllm.py
@@ -0,0 +1,49 @@
+import centml
+from centml.sdk.api import get_centml_client
+from centml.sdk import CreateInferenceV3DeploymentRequest
+from centml.sdk.utils.config_file import load_config_file_mount
+
+
+def main():
+    with get_centml_client() as cclient:
+        # Mounts ./vllm_config.yaml at /etc/vllm/vllm_config.yaml and lets vLLM
+        # consume the whole config via --config. mount_path is the parent
+        # directory; filename defaults to os.path.basename(path) so the file
+        # lands at mount_path/filename. The sibling vllm_config.yaml in this
+        # directory shows a realistic Llama-3.1-8B + EAGLE3 speculative-decoding
+        # setup; edit it (model, dtype, tensor-parallel-size, speculative-config,
+        # etc.) to match the workload before deploying.
+        request = CreateInferenceV3DeploymentRequest(
+            name="vllm-llama",
+            cluster_id=1000,
+            hardware_instance_id=1001,  # GPU instance
+            image_url="vllm/vllm-openai:latest",
+            port=8000,
+            min_replicas=1,
+            max_replicas=1,
+            initial_replicas=1,
+            max_surge=1,
+            max_unavailable=0,
+            healthcheck="/health",
+            concurrency=10,
+            env_vars={"HF_TOKEN": "<your-hf-token>"},
+            command="python3 -m vllm.entrypoints.openai.api_server --config /etc/vllm/vllm_config.yaml",
+            config_file=load_config_file_mount(path="./vllm_config.yaml", mount_path="/etc/vllm"),
+        )
+        response = cclient.create_inference(request)
+        print("Create deployment response: ", response)
+
+        deployment = cclient.get_inference(response.id)
+        print("Deployment details: ", deployment)
+
+        '''
+        ### Pause the deployment
+        cclient.pause(deployment.id)
+
+        ### Delete the deployment
+        cclient.delete(deployment.id)
+        '''
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/sdk/default.conf b/examples/sdk/default.conf
@@ -0,0 +1,7 @@
+server {
+    listen 8080;
+    location / {
+        return 200 "hello from config_file\n";
+        add_header Content-Type text/plain;
+    }
+}
diff --git a/examples/sdk/vllm_config.yaml b/examples/sdk/vllm_config.yaml
@@ -0,0 +1,24 @@
+port: 8000
+model: meta-llama/Llama-3.1-8B-Instruct
+tokenizer: meta-llama/Llama-3.1-8B-Instruct
+runner: generate
+dtype: auto
+gpu-memory-utilization: 0.9
+max-num-seqs: 2048
+tokenizer-mode: auto
+seed: 0
+tensor-parallel-size: 1
+pipeline-parallel-size: 1
+block-size: 16
+attention-backend: FLASHINFER
+distributed-executor-backend: uni
+enable-prefix-caching: true
+enable-chunked-prefill: true
+max-num-batched-tokens: 1024
+speculative-config:
+  method: eagle3
+  model: centml/EAGLE3-Llama3.1-8B-Instruct
+  num_speculative_tokens: 3
+  draft_tensor_parallel_size: 1
+enable-auto-tool-choice: true
+tool-call-parser: llama3_json
diff --git a/tests/test_sdk_config_file_helper.py b/tests/test_sdk_config_file_helper.py
@@ -0,0 +1,52 @@
+"""Tests for centml.sdk.utils.config_file.load_config_file_mount."""
+
+import pytest
+
+from centml.sdk.utils.config_file import load_config_file_mount
+
+
+def test_default_filename_from_basename(tmp_path):
+    src = tmp_path / "nginx.conf"
+    src.write_text("server { listen 80; }\n")
+
+    mount = load_config_file_mount(str(src), "/etc/nginx/conf.d")
+
+    assert mount.filename == "nginx.conf"
+    assert mount.mount_path == "/etc/nginx/conf.d"
+    assert mount.content == "server { listen 80; }\n"
+
+
+def test_explicit_filename_overrides_basename(tmp_path):
+    src = tmp_path / "local.txt"
+    src.write_text("payload")
+
+    mount = load_config_file_mount(str(src), "/app/etc", filename="remote.conf")
+
+    assert mount.filename == "remote.conf"
+    assert mount.mount_path == "/app/etc"
+    assert mount.content == "payload"
+
+
+def test_utf8_multibyte_content_roundtrips(tmp_path):
+    src = tmp_path / "i18n.conf"
+    src.write_text("配置内容 = 测试\n", encoding="utf-8")
+
+    mount = load_config_file_mount(str(src), "/etc/app")
+
+    assert mount.content == "配置内容 = 测试\n"
+
+
+def test_missing_file_raises_filenotfound(tmp_path):
+    with pytest.raises(FileNotFoundError):
+        load_config_file_mount(str(tmp_path / "does-not-exist.conf"), "/etc/x")
+
+
+def test_preserves_crlf_line_endings(tmp_path):
+    # Windows-authored configs use \r\n; the helper must not silently
+    # normalize them to \n when uploading to the server.
+    src = tmp_path / "windows.conf"
+    src.write_bytes(b"line1\r\nline2\r\n")
+
+    mount = load_config_file_mount(str(src), "/etc/app")
+
+    assert mount.content == "line1\r\nline2\r\n"