StacklokLabs · lukehinds · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # Mock LLM Server
 
-A mock LLM server that mimics OpenAI's API format. Instead of calling an actual language model,
+A FastAPI-based mock LLM server that mimics OpenAI and Anthropic API formats. Instead of calling actual language models,
+
 it uses predefined responses from a YAML configuration file. 
 
 This is made for when you want a deterministic response for testing or development purposes.
@@ -9,7 +10,7 @@ Check out the [CodeGate](https://github.com/stacklok/codegate) when you're done
 
 ## Features
 
-- OpenAI-compatible API endpoint
+- OpenAI and Anthropic compatible API endpoints
 - Streaming support (character-by-character response streaming)
 - Configurable responses via YAML file
 - Hot-reloading of response configurations
@@ -56,7 +57,9 @@ uvicorn src.mockllm.server:app --reload
 
 The server will start on `http://localhost:8000`
 
-3. Send requests to the API endpoint:
+3. Send requests to the API endpoints:
+
+### OpenAI Format
 
 Regular request:
 ```bash
@@ -83,6 +86,33 @@ curl -X POST http://localhost:8000/v1/chat/completions \
   }'
 ```
 
+### Anthropic Format
+
+Regular request:
+```bash
+curl -X POST http://localhost:8000/v1/messages \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-3-sonnet-20240229",
+    "messages": [
+      {"role": "user", "content": "what colour is the sky?"}
+    ]
+  }'
+```
+
+Streaming request:
+```bash
+curl -X POST http://localhost:8000/v1/messages \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-3-sonnet-20240229",
+    "messages": [
+      {"role": "user", "content": "what colour is the sky?"}
+    ],
+    "stream": true
+  }'
+```
+
 ## Configuration
 
 ### Response Configuration
@@ -108,7 +138,9 @@ The server automatically detects changes to `responses.yml` and reloads the conf
 
 ## API Format
 
-### Request Format
+### OpenAI Format
+
+#### Request Format
 
 ```json
 {
@@ -122,7 +154,7 @@ The server automatically detects changes to `responses.yml` and reloads the conf
 }
 ```
 
-### Response Format
+#### Response Format
 
 Regular response:
 ```json
@@ -163,18 +195,59 @@ data: {"id":"mock-999","object":"chat.completion.chunk","created":1700000000,"mo
 data: [DONE]
 ```
 
+### Anthropic Format
+
+#### Request Format
+
+```json
+{
+  "model": "claude-3-sonnet-20240229",
+  "messages": [
+    {"role": "user", "content": "what colour is the sky?"}
+  ],
+  "max_tokens": 1024,
+  "stream": false
+}
+```
+
+#### Response Format
+
+Regular response:
+```json
+{
+  "id": "mock-123",
+  "type": "message",
+  "role": "assistant",
+  "model": "claude-3-sonnet-20240229",
+  "content": [
+    {
+      "type": "text",
+      "text": "The sky is blue during a clear day due to a phenomenon called Rayleigh scattering."
+    }
+  ],
+  "usage": {
+    "input_tokens": 10,
+    "output_tokens": 5,
+    "total_tokens": 15
+  }
+}
+```
+
+Streaming response (Server-Sent Events format):
+```
+data: {"type":"message_delta","id":"mock-123","delta":{"type":"content_block_delta","index":0,"delta":{"text":"T"}}}
+
+data: {"type":"message_delta","id":"mock-123","delta":{"type":"content_block_delta","index":0,"delta":{"text":"h"}}}
+
+... (character by character)
+
+data: [DONE]
+```
+
 ## Error Handling
 
 The server includes comprehensive error handling:
 
 - Invalid requests return 400 status codes with descriptive messages
 - Server errors return 500 status codes with error details
 - All errors are logged using JSON format
-
-## Logging
-
-The server uses JSON-formatted logging
-
-- Incoming request details
-- Response configuration loading
-- Error messages and stack traces
diff --git a/src/mockllm/models.py b/src/mockllm/models.py
@@ -1,52 +1,100 @@
 import time
 import uuid
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Literal
 
 from pydantic import BaseModel, Field
 
-class Message(BaseModel):
-    """Chat message model."""
+# OpenAI Models
+class OpenAIMessage(BaseModel):
+    """OpenAI chat message model."""
     role: str
     content: str
 
-class ChatRequest(BaseModel):
-    """Chat completion request model."""
+class OpenAIChatRequest(BaseModel):
+    """OpenAI chat completion request model."""
     model: str
-    messages: List[Message]
+    messages: List[OpenAIMessage]
     temperature: Optional[float] = Field(default=0.7)
     max_tokens: Optional[int] = Field(default=150)
     stream: Optional[bool] = Field(default=False)
 
-class DeltaMessage(BaseModel):
-    """Streaming delta message model."""
+class OpenAIDeltaMessage(BaseModel):
+    """OpenAI streaming delta message model."""
     role: Optional[str] = None
     content: Optional[str] = None
 
-class StreamChoice(BaseModel):
-    """Streaming choice model."""
-    delta: DeltaMessage
+class OpenAIStreamChoice(BaseModel):
+    """OpenAI streaming choice model."""
+    delta: OpenAIDeltaMessage
     index: int = 0
     finish_reason: Optional[str] = None
 
-class ChatChoice(BaseModel):
-    """Regular chat choice model."""
-    message: Message
+class OpenAIChatChoice(BaseModel):
+    """OpenAI regular chat choice model."""
+    message: OpenAIMessage
     index: int = 0
     finish_reason: str = "stop"
 
-class ChatResponse(BaseModel):
-    """Chat completion response model."""
+class OpenAIChatResponse(BaseModel):
+    """OpenAI chat completion response model."""
     id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}")
     object: str = "chat.completion"
     created: int = Field(default_factory=lambda: int(time.time()))
     model: str
     choices: List[Dict]
     usage: Dict[str, int]
 
-class StreamResponse(BaseModel):
-    """Streaming response model."""
+class OpenAIStreamResponse(BaseModel):
+    """OpenAI streaming response model."""
     id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}")
     object: str = "chat.completion.chunk"
     created: int = Field(default_factory=lambda: int(time.time()))
     model: str
-    choices: List[StreamChoice]
+    choices: List[OpenAIStreamChoice]
+
+# Anthropic Models
+class AnthropicMessage(BaseModel):
+    """Anthropic message model."""
+    role: Literal["user", "assistant"]
+    content: str
+
+class AnthropicChatRequest(BaseModel):
+    """Anthropic chat completion request model."""
+    model: str
+    max_tokens: Optional[int] = Field(default=1024)
+    messages: List[AnthropicMessage]
+    stream: Optional[bool] = Field(default=False)
+    temperature: Optional[float] = Field(default=1.0)
+
+class AnthropicChatResponse(BaseModel):
+    """Anthropic chat completion response model."""
+    id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}")
+    type: str = "message"
+    role: str = "assistant"
+    model: str
+    content: List[Dict[str, str]]
+    stop_reason: Optional[str] = "end_turn"
+    stop_sequence: Optional[str] = None
+    usage: Dict[str, int]
+
+class AnthropicStreamDelta(BaseModel):
+    """Anthropic streaming delta model."""
+    type: str = "content_block_delta"
+    index: int = 0
+    delta: Dict[str, str]
+
+class AnthropicStreamResponse(BaseModel):
+    """Anthropic streaming response model."""
+    type: str = "message_delta"
+    id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}")
+    delta: AnthropicStreamDelta
+    usage: Optional[Dict[str, int]] = None
+
+# For backward compatibility
+Message = OpenAIMessage
+ChatRequest = OpenAIChatRequest
+DeltaMessage = OpenAIDeltaMessage
+StreamChoice = OpenAIStreamChoice
+ChatChoice = OpenAIChatChoice
+ChatResponse = OpenAIChatResponse
+StreamResponse = OpenAIStreamResponse