# Cache Impact Benchmark
This notebook measures the latency difference between cache-disabled and cache-enabled runs of the Terminal C runtime pipeline.

In [7]:
import time
from pathlib import Path
import pandas as pd
from IPython.display import display
from terminalc.runtime_core.pipelines.runtime_pipeline import RuntimePipeline
from terminalc.runtime_core.config import load_runtime_config

In [8]:
PROMPTS = [
    "What was the closing price of BTC on Oct 15, 2025?",
    "List the open, high, low, and close prices for BTC between Oct 15 and Oct 17, 2025.",
    "Which asset had the highest high on Oct 20, 2025: SOL or ADA?",
    "Summarize the news for Bitcoin between Nov 1, 2025 and Nov 7, 2025.",
    "Analyze BTC's trend on Nov 1, 2025 using both price action and RSI.",
]

PROMPTS2 = [
    "If I have 10,000 USD to deploy right now, how would you distribute it across BTC, ETH, and SOL based on the past month's signals and why?",
    "Summarize the news for Bitcoin on the first week of November, 2025.",
]


def clear_caches(cfg):
    for path in [cfg.cache.query_cache_dir, cfg.cache.prompt_cache_dir]:
        path = Path(path)
        path.mkdir(parents=True, exist_ok=True)
        for item in path.iterdir():
            if item.is_file():
                item.unlink()

def measure_latency(pipeline, prompts):
    rows = []
    for prompt in prompts:
        start = time.perf_counter()
        result = pipeline.run(prompt)
        elapsed = time.perf_counter() - start
        rows.append({
            'prompt': prompt,
            'latency_sec': elapsed,
            'model': result.model_name
        })
    return pd.DataFrame(rows)


In [9]:
cfg = load_runtime_config()
clear_caches(cfg)
pipeline_cold = RuntimePipeline(model_type='large', config=cfg)
cold_df = measure_latency(pipeline_cold, PROMPTS)

clear_caches(cfg)
pipeline_warm = RuntimePipeline(model_type='large', config=cfg)
for prompt in PROMPTS + PROMPTS2:
    pipeline_warm.run(prompt)
warm_df = measure_latency(pipeline_warm, PROMPTS + PROMPTS2)

results = pd.concat([
    cold_df.assign(cache_state='cache_disabled'),
    warm_df.assign(cache_state='cache_enabled'),
], ignore_index=True)
display(results)
print('Average latency without cache:', cold_df['latency_sec'].mean())
print('Average latency with cache:', warm_df['latency_sec'].mean())

ReadTimeout: HTTPSConnectionPool(host='router.huggingface.co', port=443): Read timed out. (read timeout=60)