KasarLabs · enitrat · Oct 4, 2025 · Oct 4, 2025
diff --git a/API_DOCUMENTATION.md b/API_DOCUMENTATION.md
@@ -215,6 +215,65 @@ Notes:
 
 `POST /v1/agents/{agent_id}/chat/completions` validates that `{agent_id}` exists. Unknown IDs return `404 Not Found` with an OpenAI-style error payload. When the `agent_id` is omitted (`/v1/chat/completions` or `/chat/completions`) the server falls back to `cairo-coder`.
 
+## Suggestions
+
+### `POST /v1/suggestions`
+
+Generate follow-up conversation suggestions based on chat history. This endpoint analyzes the conversation context and returns 4-5 relevant questions or topics the user might want to explore next.
+
+#### Request Schema
+
+```json
+{
+  "chat_history": [
+    { "role": "user", "content": "How do I create a Cairo contract?" },
+    {
+      "role": "assistant",
+      "content": "Here's how to create a Cairo contract using the #[starknet::contract] attribute..."
+    }
+  ]
+}
+```
+
+Field notes:
+
+- `chat_history` is an array of message objects with `role` and `content` fields.
+- Roles accepted: `user`, `assistant`, `system`.
+- Can be empty array (returns generic suggestions).
+
+#### Response
+
+`200 OK`
+
+```json
+{
+  "suggestions": [
+    "How do I deploy this contract to testnet?",
+    "What are the best practices for contract security?",
+    "Can you explain how storage works in Cairo contracts?",
+    "How do I write tests for this contract?"
+  ]
+}
+```
+
+#### Example
+
+```bash
+curl -X POST http://localhost:3001/v1/suggestions \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "chat_history": [
+      {"role": "user", "content": "How do I create a Cairo contract?"},
+      {"role": "assistant", "content": "Use #[starknet::contract] attribute..."}
+    ]
+  }'
+```
+
+#### Errors
+
+- `422 Unprocessable Entity` — validation error (missing `chat_history` or invalid message format).
+- `500 Internal Server Error` — suggestion generation failure.
+
 ## MCP Mode
 
 Setting either `mcp` or `x-mcp-mode` headers triggers **Model Context Protocol mode**, bypassing the LLM synthesiser:

diff --git a/python/src/cairo_coder/dspy/__init__.py b/python/src/cairo_coder/dspy/__init__.py
@@ -5,6 +5,7 @@
 - QueryProcessorProgram: Transforms user queries into structured search terms
 - DocumentRetrieverProgram: Retrieves and ranks relevant documents
 - GenerationProgram: Generates Cairo code responses from retrieved context
+- SuggestionProgram: Generates follow-up conversation suggestions
 """
 
 from .document_retriever import DocumentRetrieverProgram
@@ -16,6 +17,7 @@
 )
 from .query_processor import QueryProcessorProgram, create_query_processor
 from .retrieval_judge import RetrievalJudge
+from .suggestion_program import SuggestionGeneration
 
 __all__ = [
     "QueryProcessorProgram",
@@ -26,4 +28,5 @@
     "create_generation_program",
     "create_mcp_generation_program",
     "RetrievalJudge",
+    "SuggestionGeneration",
 ]
diff --git a/python/src/cairo_coder/dspy/suggestion_program.py b/python/src/cairo_coder/dspy/suggestion_program.py
@@ -0,0 +1,30 @@
+"""
+DSPy Suggestion Program for Cairo Coder.
+
+This module implements the SuggestionProgram that generates follow-up
+suggestions based on chat history to help users continue their conversation.
+"""
+
+
+import dspy
+import structlog
+
+logger = structlog.get_logger(__name__)
+
+
+class SuggestionGeneration(dspy.Signature):
+    """
+    Generate helpful follow-up suggestions based on a conversation history.
+
+    Analyze the conversation and generate 4-5 relevant suggestions that the user
+    might ask next. Suggestions should be medium-length, informative, and help
+    the user explore related topics or dive deeper into the current discussion.
+    """
+
+    chat_history: str = dspy.InputField(
+        desc="Previous conversation context to analyze for generating relevant follow-up suggestions"
+    )
+
+    suggestions: list[str] = dspy.OutputField(
+        desc="A list of exactly 4-5 helpful follow-up questions or suggestions that are relevant to the conversation. Each suggestion should be a complete, medium-length question that the user could ask."
+    )
diff --git a/python/src/cairo_coder/server/app.py b/python/src/cairo_coder/server/app.py
@@ -31,6 +31,7 @@
 )
 from cairo_coder.core.types import Message, Role
 from cairo_coder.dspy.document_retriever import SourceFilteredPgVectorRM
+from cairo_coder.dspy.suggestion_program import SuggestionGeneration
 from cairo_coder.utils.logging import setup_logging
 
 # Configure structured logging
@@ -129,6 +130,18 @@ class ErrorResponse(BaseModel):
     error: ErrorDetail = Field(..., description="Error details")
 
 
+class SuggestionRequest(BaseModel):
+    """Request model for generating conversation suggestions."""
+
+    chat_history: list[ChatMessage] = Field(..., description="Conversation history to generate suggestions from")
+
+
+class SuggestionResponse(BaseModel):
+    """Response model for conversation suggestions."""
+
+    suggestions: list[str] = Field(..., description="List of 4-5 follow-up suggestions")
+
+
 class CairoCoderServer:
     """
     FastAPI server for Cairo Coder that replicates TypeScript backend functionality.
@@ -291,6 +304,32 @@ async def chat_completions(
                 request, req, agent_factory, None, mcp_mode, vector_db
             )
 
+        @self.app.post("/v1/suggestions", response_model=SuggestionResponse)
+        async def generate_suggestions(request: SuggestionRequest):
+            """Generate follow-up conversation suggestions based on chat history."""
+            try:
+                formatted_history = self._format_chat_history_for_suggestions(request.chat_history)
+                suggestion_program = dspy.Predict(SuggestionGeneration)
+                with dspy.context(
+                    lm=dspy.LM("gemini/gemini-flash-lite-latest", max_tokens=10000), adapter=XMLAdapter()
+                ):
+                    result = await suggestion_program.aforward(chat_history=formatted_history)
+                suggestions = result.suggestions if isinstance(result.suggestions, list) else []
+                return SuggestionResponse(suggestions=suggestions)
+
+            except Exception as e:
+                logger.error("Error generating suggestions", error=str(e), exc_info=True)
+                raise HTTPException(
+                    status_code=500,
+                    detail=ErrorResponse(
+                        error=ErrorDetail(
+                            message="Failed to generate suggestions",
+                            type="server_error",
+                            code="internal_error",
+                        )
+                    ).dict(),
+                ) from e
+
     async def _handle_chat_completion(
         self,
         request: ChatCompletionRequest,
@@ -450,6 +489,26 @@ async def _stream_chat_completion(
         yield f"data: {json.dumps(final_chunk)}\n\n"
         yield "data: [DONE]\n\n"
 
+    def _format_chat_history_for_suggestions(self, chat_history: list[ChatMessage]) -> str:
+        """
+        Format chat history for suggestion generation.
+
+        Args:
+            chat_history: List of chat messages
+
+        Returns:
+            Formatted chat history string
+        """
+        if not chat_history:
+            return ""
+
+        formatted = []
+        for msg in chat_history:
+            role = "User" if msg.role == "user" else "Assistant"
+            formatted.append(f"{role}: {msg.content}")
+
+        return "\n".join(formatted)
+
     async def _generate_chat_completion(
         self, agent: RagPipeline, query: str, history: list[Message], mcp_mode: bool
     ) -> ChatCompletionResponse:

diff --git a/python/tests/integration/conftest.py b/python/tests/integration/conftest.py
@@ -5,6 +5,8 @@
 - An integration client that injects a real RagPipeline wired to mocks.
 """
 
+from unittest.mock import AsyncMock, Mock
+
 import pytest
 from fastapi.testclient import TestClient
 
@@ -155,9 +157,33 @@ async def _fake_gen_aforward_streaming(query: str, context: str, chat_history: s
 
     return pipeline
 
+@pytest.fixture
+def patch_suggestion_program(monkeypatch):
+    """Patch SuggestionGeneration to return mock suggestions."""
+    import dspy
+
+    mock_suggestion_program = Mock(spec=dspy.Predict)
+    mock_suggestion_program.aforward = AsyncMock(return_value=dspy.Prediction(suggestions=[
+        "How do I deploy this contract to testnet?",
+        "What are the best practices for contract security?",
+        "Can you explain how storage works in Cairo contracts?",
+        "How do I write tests for this contract?",
+    ]))
+
+    # Patch dspy.Predict to return our mock when called with SuggestionGeneration
+    original_predict = dspy.Predict
+
+    def mock_predict_constructor(signature):
+        from cairo_coder.dspy.suggestion_program import SuggestionGeneration
+        if signature is SuggestionGeneration or signature == SuggestionGeneration:
+            return mock_suggestion_program
+        return original_predict(signature)
+
+    monkeypatch.setattr("dspy.Predict", mock_predict_constructor)
+
 
 @pytest.fixture
-def client(server, real_pipeline, mock_vector_db, mock_agent_factory):
+def client(server, real_pipeline, mock_vector_db, mock_agent_factory, patch_suggestion_program):
     """Integration-level client with pipeline injection.
 
     Overrides FastAPI dependencies:

diff --git a/python/tests/integration/test_server_integration.py b/python/tests/integration/test_server_integration.py
@@ -407,6 +407,7 @@ def test_app_routes(self, mock_vector_store_config: Mock):
             assert "/" in routes
             assert "/v1/agents" in routes
             assert "/v1/chat/completions" in routes
+            assert "/v1/suggestions" in routes
 
 
 class TestOpenAICompatibility:
@@ -517,6 +518,59 @@ def test_openai_error_response_structure(self, client: TestClient, mock_agent_fa
         assert error["code"] == "agent_not_found"
 
 
+class TestSuggestionEndpoint:
+    """Test suite for the suggestion generation endpoint."""
+
+    def test_suggestion_generation_success(self, client: TestClient):
+        """Test successful suggestion generation with chat history."""
+        response = client.post(
+            "/v1/suggestions",
+            json={
+                "chat_history": [
+                    {"role": "user", "content": "How do I create a Cairo contract?"},
+                    {
+                        "role": "assistant",
+                        "content": "Here's how to create a Cairo contract using the #[starknet::contract] attribute...",
+                    },
+                ]
+            },
+        )
+        assert response.status_code == 200
+        data = response.json()
+
+        # Validate response structure
+        assert "suggestions" in data
+        assert isinstance(data["suggestions"], list)
+        assert len(data["suggestions"]) >= 1  # Should have suggestions
+
+    def test_suggestion_generation_empty_history(self, client: TestClient):
+        """Test suggestion generation with empty chat history."""
+        response = client.post(
+            "/v1/suggestions",
+            json={"chat_history": []},
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert "suggestions" in data
+        assert isinstance(data["suggestions"], list)
+
+    def test_suggestion_generation_validation_error(self, client: TestClient):
+        """Test validation error when chat_history is missing."""
+        response = client.post(
+            "/v1/suggestions",
+            json={},
+        )
+        assert response.status_code == 422  # Pydantic validation error
+
+    def test_suggestion_generation_invalid_message_format(self, client: TestClient):
+        """Test error handling with invalid message format."""
+        response = client.post(
+            "/v1/suggestions",
+            json={"chat_history": [{"invalid": "format"}]},
+        )
+        assert response.status_code == 422  # Pydantic validation error
+
+
 class TestMCPModeCompatibility:
     """Test suite for MCP mode compatibility with TypeScript backend."""