From 07aa0ab639d72dcc3924198b2feb6dd96f175540 Mon Sep 17 00:00:00 2001 From: Luke Hinds Date: Fri, 14 Feb 2025 13:13:29 +0000 Subject: [PATCH] Add Anthropic API --- README.md | 90 ++++++++++++++++++++++++++++++++++++--- src/mockllm/models.py | 86 +++++++++++++++++++++++++++++--------- src/mockllm/server.py | 97 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 233 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 31737f7..952a462 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Mock LLM Server -A FastAPI-based mock LLM server that mimics OpenAI's API format. Instead of calling an actual language model, +A FastAPI-based mock LLM server that mimics OpenAI and Anthropic API formats. Instead of calling actual language models, it uses predefined responses from a YAML configuration file. This is made for when you want a deterministic response for testing or development purposes. @@ -9,7 +9,7 @@ Check out the [CodeGate](https://github.com/stacklok/codegate) when you're done ## Features -- OpenAI-compatible API endpoint +- OpenAI and Anthropic compatible API endpoints - Streaming support (character-by-character response streaming) - Configurable responses via YAML file - Hot-reloading of response configurations @@ -56,7 +56,9 @@ uvicorn src.mockllm.server:app --reload The server will start on `http://localhost:8000` -3. Send requests to the API endpoint: +3. Send requests to the API endpoints: + +### OpenAI Format Regular request: ```bash @@ -83,6 +85,33 @@ curl -X POST http://localhost:8000/v1/chat/completions \ }' ``` +### Anthropic Format + +Regular request: +```bash +curl -X POST http://localhost:8000/v1/messages \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-3-sonnet-20240229", + "messages": [ + {"role": "user", "content": "what colour is the sky?"} + ] + }' +``` + +Streaming request: +```bash +curl -X POST http://localhost:8000/v1/messages \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-3-sonnet-20240229", + "messages": [ + {"role": "user", "content": "what colour is the sky?"} + ], + "stream": true + }' +``` + ## Configuration ### Response Configuration @@ -108,7 +137,9 @@ The server automatically detects changes to `responses.yml` and reloads the conf ## API Format -### Request Format +### OpenAI Format + +#### Request Format ```json { @@ -122,7 +153,7 @@ The server automatically detects changes to `responses.yml` and reloads the conf } ``` -### Response Format +#### Response Format Regular response: ```json @@ -163,6 +194,55 @@ data: {"id":"mock-999","object":"chat.completion.chunk","created":1700000000,"mo data: [DONE] ``` +### Anthropic Format + +#### Request Format + +```json +{ + "model": "claude-3-sonnet-20240229", + "messages": [ + {"role": "user", "content": "what colour is the sky?"} + ], + "max_tokens": 1024, + "stream": false +} +``` + +#### Response Format + +Regular response: +```json +{ + "id": "mock-123", + "type": "message", + "role": "assistant", + "model": "claude-3-sonnet-20240229", + "content": [ + { + "type": "text", + "text": "The sky is blue during a clear day due to a phenomenon called Rayleigh scattering." + } + ], + "usage": { + "input_tokens": 10, + "output_tokens": 5, + "total_tokens": 15 + } +} +``` + +Streaming response (Server-Sent Events format): +``` +data: {"type":"message_delta","id":"mock-123","delta":{"type":"content_block_delta","index":0,"delta":{"text":"T"}}} + +data: {"type":"message_delta","id":"mock-123","delta":{"type":"content_block_delta","index":0,"delta":{"text":"h"}}} + +... (character by character) + +data: [DONE] +``` + ## Error Handling The server includes comprehensive error handling: diff --git a/src/mockllm/models.py b/src/mockllm/models.py index 4628065..ae3f62e 100644 --- a/src/mockllm/models.py +++ b/src/mockllm/models.py @@ -1,41 +1,42 @@ import time import uuid -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Literal from pydantic import BaseModel, Field -class Message(BaseModel): - """Chat message model.""" +# OpenAI Models +class OpenAIMessage(BaseModel): + """OpenAI chat message model.""" role: str content: str -class ChatRequest(BaseModel): - """Chat completion request model.""" +class OpenAIChatRequest(BaseModel): + """OpenAI chat completion request model.""" model: str - messages: List[Message] + messages: List[OpenAIMessage] temperature: Optional[float] = Field(default=0.7) max_tokens: Optional[int] = Field(default=150) stream: Optional[bool] = Field(default=False) -class DeltaMessage(BaseModel): - """Streaming delta message model.""" +class OpenAIDeltaMessage(BaseModel): + """OpenAI streaming delta message model.""" role: Optional[str] = None content: Optional[str] = None -class StreamChoice(BaseModel): - """Streaming choice model.""" - delta: DeltaMessage +class OpenAIStreamChoice(BaseModel): + """OpenAI streaming choice model.""" + delta: OpenAIDeltaMessage index: int = 0 finish_reason: Optional[str] = None -class ChatChoice(BaseModel): - """Regular chat choice model.""" - message: Message +class OpenAIChatChoice(BaseModel): + """OpenAI regular chat choice model.""" + message: OpenAIMessage index: int = 0 finish_reason: str = "stop" -class ChatResponse(BaseModel): - """Chat completion response model.""" +class OpenAIChatResponse(BaseModel): + """OpenAI chat completion response model.""" id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}") object: str = "chat.completion" created: int = Field(default_factory=lambda: int(time.time())) @@ -43,10 +44,57 @@ class ChatResponse(BaseModel): choices: List[Dict] usage: Dict[str, int] -class StreamResponse(BaseModel): - """Streaming response model.""" +class OpenAIStreamResponse(BaseModel): + """OpenAI streaming response model.""" id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}") object: str = "chat.completion.chunk" created: int = Field(default_factory=lambda: int(time.time())) model: str - choices: List[StreamChoice] + choices: List[OpenAIStreamChoice] + +# Anthropic Models +class AnthropicMessage(BaseModel): + """Anthropic message model.""" + role: Literal["user", "assistant"] + content: str + +class AnthropicChatRequest(BaseModel): + """Anthropic chat completion request model.""" + model: str + max_tokens: Optional[int] = Field(default=1024) + messages: List[AnthropicMessage] + stream: Optional[bool] = Field(default=False) + temperature: Optional[float] = Field(default=1.0) + +class AnthropicChatResponse(BaseModel): + """Anthropic chat completion response model.""" + id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}") + type: str = "message" + role: str = "assistant" + model: str + content: List[Dict[str, str]] + stop_reason: Optional[str] = "end_turn" + stop_sequence: Optional[str] = None + usage: Dict[str, int] + +class AnthropicStreamDelta(BaseModel): + """Anthropic streaming delta model.""" + type: str = "content_block_delta" + index: int = 0 + delta: Dict[str, str] + +class AnthropicStreamResponse(BaseModel): + """Anthropic streaming response model.""" + type: str = "message_delta" + id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}") + delta: AnthropicStreamDelta + usage: Optional[Dict[str, int]] = None + +# For backward compatibility +Message = OpenAIMessage +ChatRequest = OpenAIChatRequest +DeltaMessage = OpenAIDeltaMessage +StreamChoice = OpenAIStreamChoice +ChatChoice = OpenAIChatChoice +ChatResponse = OpenAIChatResponse +StreamResponse = OpenAIStreamResponse diff --git a/src/mockllm/server.py b/src/mockllm/server.py index e2db08b..aa90469 100644 --- a/src/mockllm/server.py +++ b/src/mockllm/server.py @@ -6,8 +6,10 @@ from pythonjsonlogger import jsonlogger from .config import ResponseConfig -from .models import (ChatRequest, ChatResponse, DeltaMessage, - StreamChoice, StreamResponse) +from .models import (OpenAIChatRequest, OpenAIChatResponse, OpenAIDeltaMessage, + OpenAIStreamChoice, OpenAIStreamResponse, + AnthropicChatRequest, AnthropicChatResponse, + AnthropicStreamResponse, AnthropicStreamDelta) log_handler = logging.StreamHandler() log_handler.setFormatter(jsonlogger.JsonFormatter()) @@ -18,14 +20,14 @@ response_config = ResponseConfig() -async def stream_response(content: str, model: str) -> AsyncGenerator[str, None]: - """Generate streaming response in SSE format.""" +async def openai_stream_response(content: str, model: str) -> AsyncGenerator[str, None]: + """Generate OpenAI-style streaming response in SSE format.""" # Send the first message with role - first_chunk = StreamResponse( + first_chunk = OpenAIStreamResponse( model=model, choices=[ - StreamChoice( - delta=DeltaMessage(role="assistant") + OpenAIStreamChoice( + delta=OpenAIDeltaMessage(role="assistant") ) ] ) @@ -33,22 +35,22 @@ async def stream_response(content: str, model: str) -> AsyncGenerator[str, None] # Stream the content character by character for chunk in response_config.get_streaming_response(content): - chunk_response = StreamResponse( + chunk_response = OpenAIStreamResponse( model=model, choices=[ - StreamChoice( - delta=DeltaMessage(content=chunk) + OpenAIStreamChoice( + delta=OpenAIDeltaMessage(content=chunk) ) ] ) yield f"data: {chunk_response.model_dump_json()}\n\n" # Send the final message - final_chunk = StreamResponse( + final_chunk = OpenAIStreamResponse( model=model, choices=[ - StreamChoice( - delta=DeltaMessage(), + OpenAIStreamChoice( + delta=OpenAIDeltaMessage(), finish_reason="stop" ) ] @@ -56,8 +58,20 @@ async def stream_response(content: str, model: str) -> AsyncGenerator[str, None] yield f"data: {final_chunk.model_dump_json()}\n\n" yield "data: [DONE]\n\n" +async def anthropic_stream_response(content: str, model: str) -> AsyncGenerator[str, None]: + """Generate Anthropic-style streaming response in SSE format.""" + for chunk in response_config.get_streaming_response(content): + stream_response = AnthropicStreamResponse( + delta=AnthropicStreamDelta( + delta={"text": chunk} + ) + ) + yield f"data: {stream_response.model_dump_json()}\n\n" + + yield "data: [DONE]\n\n" + @app.post("/v1/chat/completions", response_model=None) -async def chat_completion(request: ChatRequest) -> Union[ChatResponse, StreamingResponse]: +async def openai_chat_completion(request: OpenAIChatRequest) -> Union[OpenAIChatResponse, StreamingResponse]: """Handle chat completion requests, supporting both regular and streaming responses.""" try: logger.info("Received chat completion request", extra={ @@ -79,7 +93,7 @@ async def chat_completion(request: ChatRequest) -> Union[ChatResponse, Streaming if request.stream: return StreamingResponse( - stream_response(last_message.content, request.model), + openai_stream_response(last_message.content, request.model), media_type="text/event-stream" ) @@ -90,7 +104,7 @@ async def chat_completion(request: ChatRequest) -> Union[ChatResponse, Streaming completion_tokens = len(response_content.split()) total_tokens = prompt_tokens + completion_tokens - return ChatResponse( + return OpenAIChatResponse( model=request.model, choices=[{ "index": 0, @@ -113,3 +127,54 @@ async def chat_completion(request: ChatRequest) -> Union[ChatResponse, Streaming status_code=500, detail=f"Internal server error: {str(e)}" ) + +@app.post("/v1/messages", response_model=None) +async def anthropic_chat_completion(request: AnthropicChatRequest) -> Union[AnthropicChatResponse, StreamingResponse]: + """Handle Anthropic chat completion requests, supporting both regular and streaming responses.""" + try: + logger.info("Received Anthropic chat completion request", extra={ + "model": request.model, + "message_count": len(request.messages), + "stream": request.stream + }) + + last_message = next( + (msg for msg in reversed(request.messages) if msg.role == "user"), + None + ) + + if not last_message: + raise HTTPException( + status_code=400, + detail="No user message found in request" + ) + + if request.stream: + return StreamingResponse( + anthropic_stream_response(last_message.content, request.model), + media_type="text/event-stream" + ) + + response_content = response_config.get_response(last_message.content) + + # Calculate mock token counts + prompt_tokens = len(str(request.messages).split()) + completion_tokens = len(response_content.split()) + total_tokens = prompt_tokens + completion_tokens + + return AnthropicChatResponse( + model=request.model, + content=[{"type": "text", "text": response_content}], + usage={ + "input_tokens": prompt_tokens, + "output_tokens": completion_tokens, + "total_tokens": total_tokens + } + ) + + except Exception as e: + logger.error(f"Error processing request: {str(e)}") + raise HTTPException( + status_code=500, + detail=f"Internal server error: {str(e)}" + )