Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(schema.prisma): store model id + model group as part of spend logs allows precise model metrics #3789

Merged
merged 3 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 101 additions & 2 deletions litellm/proxy/proxy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8565,6 +8565,102 @@ async def model_info_v2(
return {"data": all_models}


@router.get(
"/model/streaming_metrics",
description="View time to first token for models in spend logs",
tags=["model management"],
include_in_schema=False,
dependencies=[Depends(user_api_key_auth)],
)
async def model_streaming_metrics(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
_selected_model_group: Optional[str] = None,
startTime: Optional[datetime] = None,
endTime: Optional[datetime] = None,
):
global prisma_client, llm_router
if prisma_client is None:
raise ProxyException(
message=CommonProxyErrors.db_not_connected_error.value,
type="internal_error",
param="None",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

startTime = startTime or datetime.now() - timedelta(days=7) # show over past week
endTime = endTime or datetime.now()

sql_query = """
SELECT
api_base,
model_group,
model,
DATE_TRUNC('day', "startTime")::DATE AS day,
AVG(EXTRACT(epoch FROM ("completionStartTime" - "startTime"))) AS time_to_first_token
FROM
"LiteLLM_SpendLogs"
WHERE
"startTime" BETWEEN $2::timestamp AND $3::timestamp
AND "model_group" = $1 AND "cache_hit" != 'True'
AND "completionStartTime" IS NOT NULL
AND "completionStartTime" != "endTime"
GROUP BY
api_base,
model_group,
model,
day
ORDER BY
time_to_first_token DESC;
"""

_all_api_bases = set()
db_response = await prisma_client.db.query_raw(
sql_query, _selected_model_group, startTime, endTime
)
_daily_entries: dict = {} # {"Jun 23": {"model1": 0.002, "model2": 0.003}}
if db_response is not None:
for model_data in db_response:
_api_base = model_data["api_base"]
_model = model_data["model"]
_day = model_data["day"]
time_to_first_token = model_data["time_to_first_token"]
if _day not in _daily_entries:
_daily_entries[_day] = {}
_combined_model_name = str(_model)
if "https://" in _api_base:
_combined_model_name = str(_api_base)
if "/openai/" in _combined_model_name:
_combined_model_name = _combined_model_name.split("/openai/")[0]

_all_api_bases.add(_combined_model_name)
_daily_entries[_day][_combined_model_name] = time_to_first_token

"""
each entry needs to be like this:
{
date: 'Jun 23',
'gpt-4-https://api.openai.com/v1/': 0.002,
'gpt-43-https://api.openai.com-12/v1/': 0.002,
}
"""
# convert daily entries to list of dicts

response: List[dict] = []

# sort daily entries by date
_daily_entries = dict(sorted(_daily_entries.items(), key=lambda item: item[0]))
for day in _daily_entries:
entry = {"date": str(day)}
for model_key, latency in _daily_entries[day].items():
entry[model_key] = latency
response.append(entry)

return {
"data": response,
"all_api_bases": list(_all_api_bases),
}


@router.get(
"/model/metrics",
description="View number of requests & avg latency per model on config.yaml",
Expand Down Expand Up @@ -8592,16 +8688,18 @@ async def model_metrics(
sql_query = """
SELECT
api_base,
model_group,
model,
DATE_TRUNC('day', "startTime")::DATE AS day,
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) / SUM(total_tokens) AS avg_latency_per_token
FROM
"LiteLLM_SpendLogs"
WHERE
"startTime" BETWEEN $2::timestamp AND $3::timestamp
AND "model" = $1 AND "cache_hit" != 'True'
AND "model_group" = $1 AND "cache_hit" != 'True'
GROUP BY
api_base,
model_group,
model,
day
HAVING
Expand All @@ -8614,6 +8712,7 @@ async def model_metrics(
sql_query, _selected_model_group, startTime, endTime
)
_daily_entries: dict = {} # {"Jun 23": {"model1": 0.002, "model2": 0.003}}

if db_response is not None:
for model_data in db_response:
_api_base = model_data["api_base"]
Expand Down Expand Up @@ -8697,7 +8796,7 @@ async def model_metrics_slow_responses(
FROM
"LiteLLM_SpendLogs"
WHERE
"model" = $2
"model_group" = $2
AND "cache_hit" != 'True'
AND "startTime" >= $3::timestamp
AND "startTime" <= $4::timestamp
Expand Down
2 changes: 2 additions & 0 deletions litellm/proxy/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ model LiteLLM_SpendLogs {
endTime DateTime // Assuming end_time is a DateTime field
completionStartTime DateTime? // Assuming completionStartTime is a DateTime field
model String @default("")
model_id String? @default("") // the model id stored in proxy model db
model_group String? @default("") // public model_name / model_group
api_base String @default("")
user String @default("")
metadata Json @default("{}")
Expand Down
5 changes: 5 additions & 0 deletions litellm/proxy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1874,6 +1874,9 @@ def get_logging_payload(
# hash the api_key
api_key = hash_token(api_key)

_model_id = metadata.get("model_info", {}).get("id", "")
_model_group = metadata.get("model_group", "")

# clean up litellm metadata
if isinstance(metadata, dict):
clean_metadata = {}
Expand Down Expand Up @@ -1928,6 +1931,8 @@ def get_logging_payload(
"request_tags": metadata.get("tags", []),
"end_user": end_user_id or "",
"api_base": litellm_params.get("api_base", ""),
"model_group": _model_group,
"model_id": _model_id,
}

verbose_proxy_logger.debug("SpendTable: created payload - payload: %s\n\n", payload)
Expand Down
2 changes: 2 additions & 0 deletions schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ model LiteLLM_SpendLogs {
endTime DateTime // Assuming end_time is a DateTime field
completionStartTime DateTime? // Assuming completionStartTime is a DateTime field
model String @default("")
model_id String? @default("") // the model id stored in proxy model db
model_group String? @default("") // public model_name / model_group
api_base String @default("")
user String @default("")
metadata Json @default("{}")
Expand Down
Loading
Loading