In [None]:
#@title LLM 統一設定區

LLM_CONFIGS = [
    {
        "name": "gpt-4o",
        "type": "openai",
        "api_key": "sk-proj-",
        "endpoint": "https://api.openai.com/v1/chat/completions",
        "model": "gpt-4o",
    },
    {
        "name": "gemini-2.5-flash",
        "type": "gemini",
        "api_key": "",
        "endpoint": "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent",
        "model": "gemini-2.5-flash",
    },
    {
        "name": "groq",
        "type": "groq",
        "api_key": "",
        "endpoint": "https://api.groq.com/openai/v1/chat/completions",
        "model": "meta-llama/llama-4-maverick-17b-128e-instruct",
    }

    # 新增更多 LLM，只需複製加一行即可
]

# 統一參數
PROMPT = "請簡單解釋黑洞是什麼？"
MAX_TOKENS = 100

In [14]:
import time, requests, openai, pandas as pd


def test_openai(cfg):
    client = openai.OpenAI(api_key=cfg["api_key"])
    start = time.time()
    resp = client.chat.completions.create(
        model=cfg["model"],
        messages=[{"role": "user", "content": PROMPT}],
        max_tokens=MAX_TOKENS
    )
    elapsed = time.time() - start
    return resp.usage.completion_tokens, elapsed

def test_gemini(cfg):
    url = f'{cfg["endpoint"]}?key={cfg["api_key"]}'
    payload = {
        "contents": [{"parts": [{"text": PROMPT}]}],
        "generationConfig": {"maxOutputTokens": MAX_TOKENS}
    }
    start = time.time()
    r = requests.post(url, json=payload)
    elapsed = time.time() - start
    data = r.json()
    # 部分 Gemini 版本 API 會有不同格式，這裡以最常見結構抓取
    try:
        text = data["candidates"][0]["content"]["parts"][0]["text"]
    except:
        text = str(data)
    tokens = len(text.split())
    return tokens, elapsed

def test_groq(cfg):
    headers = {
        "Authorization": f"Bearer {cfg['api_key']}",
        "Content-Type": "application/json",
    }
    payload = {
        "model": cfg["model"],
        "messages": [{"role": "user", "content": PROMPT}],
        "max_tokens": MAX_TOKENS
    }
    start = time.time()
    r = requests.post(cfg["endpoint"], headers=headers, json=payload)
    elapsed = time.time() - start
    data = r.json()
    # Groq 格式同 OpenAI
    tokens = data["usage"]["completion_tokens"] if "usage" in data else len(data.get("choices",[{}])[0].get("message",{}).get("content","").split())
    return tokens, elapsed

TYPE_TO_FUNC = {
    "openai": test_openai,
    "gemini": test_gemini,
    "groq": test_groq,
}

results = []
for cfg in LLM_CONFIGS:
    try:
        tokens, elapsed = TYPE_TO_FUNC[cfg["type"]](cfg)
        results.append({
            "model": cfg["name"],
            "tokens": tokens,
            "elapsed_sec": elapsed,
            "tps": tokens/elapsed
        })
    except Exception as e:
        results.append({
            "model": cfg["name"],
            "error": str(e)
        })

pd.DataFrame(results)

Unnamed: 0,model,tokens,elapsed_sec,tps
0,gpt-4o,100,4.114136,24.306439
1,gemini-2.5-flash,45,0.212409,211.854935
2,groq,98,0.70565,138.879077
