# Structured RAG

In [1]:
from openai import OpenAI

openai_client = OpenAI()

In [2]:
from pydantic import BaseModel

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

In [3]:
from gitsource import GithubRepositoryDataReader, chunk_documents
from minsearch import Index

reader = GithubRepositoryDataReader(
    repo_owner="evidentlyai",
    repo_name="docs",
    allowed_extensions={"md", "mdx"},
)
files = reader.read()

parsed_docs = [doc.parse() for doc in files]
chunked_docs = chunk_documents(parsed_docs, size=3000, step=1500)

index = Index(
    text_fields=["title", "description", "content"],
    keyword_fields=["filename"]
)
index.fit(chunked_docs)

print(f"Indexed {len(chunked_docs)} chunks from {len(files)} documents")

Indexed 385 chunks from 95 documents


In [4]:
def search(query):
    results = index.search(
        query=query,
        num_results=5
    )
    return results

In [5]:
import json

instructions = """
You're a documentation assistant. Answer the QUESTION based on the CONTEXT from our documentation.

Use only facts from the CONTEXT when answering.
If the answer isn't in the CONTEXT, say so.
"""

prompt_template = """
<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(question, search_results):
    context = json.dumps(search_results, indent=2)
    return prompt_template.format(
        question=question,
        context=context
    )

In [6]:
def llm(user_prompt, instructions=None, model="gpt-4o-mini"):
    messages = []

    if instructions:
        messages.append({
            "role": "system",
            "content": instructions
        })

    messages.append({
        "role": "user",
        "content": user_prompt
    })

    response = openai_client.responses.create(
        model=model,
        input=messages
    )

    return response.output_text

In [7]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    return llm(prompt, instructions)

In [8]:
answer = rag('how do i implement llm as a judge?')

In [9]:
def llm_structured(
        user_prompt,
        output_type =None,
        instructions=None,
        model='gpt-4o-mini',
        ):
    
    messages = []

    if instructions:
        messages.append({
            "role": "system",
            "content": instructions
        })

    messages.append({
        "role": "user",
        "content": user_prompt
    })

    response = openai_client.responses.parse(
        model=model,
        input=messages,
        text_format=output_type
    )

    return response.output_parsed

In [10]:
response = llm_structured(
    instructions='Extract the event information.',
    user_prompt='Alice and Bob are going to a science fair on Friday',
    output_type=CalendarEvent
)

In [11]:
response

CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob'])

In [12]:
class RagResponse(BaseModel):
    answer: str
    found_answer: bool

In [13]:
def rag_structured(query, output_type = RagResponse):
    search_results = search(query)
    prompt = build_prompt(query, search_results)

    return llm_structured(instructions = instructions,
                           user_prompt= prompt, 
                           output_type = output_type)

In [14]:
answer = rag_structured('how do i do llm evals?')

print(answer.answer[:100])
print(answer.found_answer)

To perform LLM evaluations, you can follow these steps:

1. **Installation and Imports**: Install th
True


In [15]:
answer = rag_structured('how do i isntall kafka on windows?')

print(answer.answer[:100])
print(answer.found_answer)

The CONTEXT does not provide any information on how to install Kafka on Windows.
False


In [16]:
RagResponse.model_json_schema()

{'properties': {'answer': {'title': 'Answer', 'type': 'string'},
  'found_answer': {'title': 'Found Answer', 'type': 'boolean'}},
 'required': ['answer', 'found_answer'],
 'title': 'RagResponse',
 'type': 'object'}

## Add optional on class RagResponse

In [None]:
from typing import Optional

class RagResponse(BaseModel):
    answer:Optional[str] = None
    found_answer: bool

In [18]:
RagResponse.model_json_schema()

{'properties': {'answer': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'title': 'Answer'},
  'found_answer': {'title': 'Found Answer', 'type': 'boolean'}},
 'required': ['found_answer'],
 'title': 'RagResponse',
 'type': 'object'}

In [19]:
answer = rag_structured('how do i isntall kafka on windows?', RagResponse)

print(answer.answer)
print(answer.found_answer)

None
False


## Add into instructions answer None, if llm dont know the answer

In [24]:
instructions = """
You're a documentation assistant. Answer the QUESTION based on the CONTEXT from our documentation.

Use only facts from the CONTEXT when answering.
If the answer isn't in the CONTEXT, say so.

If you don't know the answer, set 'answer' to None
"""

In [25]:
answer = rag_structured('how do i isntall kafka on windows?', RagResponse)

print(answer.answer)
print(answer.found_answer)

None
False


## Add into class RagResponse If the answer to the question wasn't found in the database

In [26]:
instructions = """
You're a documentation assistant. Answer the QUESTION based on the CONTEXT from our documentation.

Use only facts from the CONTEXT when answering.
If the answer isn't in the CONTEXT, say so.
"""

In [29]:
from typing import Optional

class RagResponse(BaseModel):
    """
    The response from the documentation RAG system
    """
    answer:Optional[str] = None # If the answer to the question wasn't found in the database, 'answer' is None
    found_answer: bool

In [30]:
RagResponse.model_json_schema()

{'description': 'The response from the documentation RAG system',
 'properties': {'answer': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'title': 'Answer'},
  'found_answer': {'title': 'Found Answer', 'type': 'boolean'}},
 'required': ['found_answer'],
 'title': 'RagResponse',
 'type': 'object'}

In [31]:
from pydantic import Field

class RagResponse(BaseModel):
    """
    The response from the documentation RAG system
    """
    answer:Optional[str] = Field(None, description="Answer to the question or None if it's not found")
    found_answer: bool = Field(description="True if the answer is found, False otherwise")

In [32]:
RagResponse.model_json_schema()

{'description': 'The response from the documentation RAG system',
 'properties': {'answer': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'description': "Answer to the question or None if it's not found",
   'title': 'Answer'},
  'found_answer': {'description': 'True if the answer is found, False otherwise',
   'title': 'Found Answer',
   'type': 'boolean'}},
 'required': ['found_answer'],
 'title': 'RagResponse',
 'type': 'object'}

In [33]:
answer = rag_structured('how do i isntall kafka on windows?', RagResponse)

print(answer.answer)
print(answer.found_answer)

None
False


In [40]:
from typing import Literal

class RagResponse(BaseModel):
    """
    This model provides a structured answer with metadata about the response,
    including confidence, categorization, and follow-up suggestions.
    """
    answer: str = Field(description="The main answer to the user's question in markdown")
    found_answer: bool = Field(description="True if relevant information was found in the documentation")
    confidence: float = Field(description="Confidence score from 0.0 to 1.0 indicating how certain the answer is")
    confidence_explanation: str = Field(description="A brief explanation of the confidence score, highlighting key factors that influenced it")
    answer_type: Literal["how-to","explanation", "troubleshoting","comparison","reference"] = Field(description="The category of the answer")
    followup_questions: list[str] = Field(description="Suggested follow-up questions the user might want to ask")

In [41]:
RagResponse.model_json_schema()

{'description': 'This model provides a structured answer with metadata about the response,\nincluding confidence, categorization, and follow-up suggestions.',
 'properties': {'answer': {'description': "The main answer to the user's question in markdown",
   'title': 'Answer',
   'type': 'string'},
  'found_answer': {'description': 'True if relevant information was found in the documentation',
   'title': 'Found Answer',
   'type': 'boolean'},
  'confidence': {'description': 'Confidence score from 0.0 to 1.0 indicating how certain the answer is',
   'title': 'Confidence',
   'type': 'number'},
  'confidence_explanation': {'description': 'A brief explanation of the confidence score, highlighting key factors that influenced it',
   'title': 'Confidence Explanation',
   'type': 'string'},
  'answer_type': {'description': 'The category of the answer',
   'enum': ['how-to',
    'explanation',
    'troubleshoting',
    'comparison',
    'reference'],
   'title': 'Answer Type',
   'type': 'str

In [44]:
answer = rag_structured('how do I evaluate llms', RagResponse)

In [45]:
print(answer.answer)

To evaluate language models (LLMs), you can follow this structured approach:

### 1. Set Up Evaluators with Multiple LLMs
Use multiple LLMs to assess the same outputs. An output is considered a "pass" if all or the majority of LLMs approve, enabling you to see aggregate results and disagreements. Here’s how to set it up:
   
```python
import os
os.environ["OPENAI_API_KEY"] = "YOUR KEY"
os.environ["GEMINI_API_KEY"] = "YOUR KEY"
os.environ["ANTHROPIC_API_KEY"] = "YOUR KEY"
```

### 2. Dataset Preparation
Define a dataset of user intents alongside generated emails to evaluate their appropriateness:

```python
data = [
    ["user input", "generated email"],
    ... 
]
eval_df = pd.DataFrame(data, columns=["user input", "generated email"])
```

### 3. Define the Evaluation Criteria
Utilize an evaluation template to set the judging criteria.

```python
from evidently.llm.templates import BinaryClassificationPromptTemplate

criteria = BinaryClassificationPromptTemplate(
    pre_messages=[("sy

In [46]:
answer

RagResponse(answer='To evaluate language models (LLMs), you can follow this structured approach:\n\n### 1. Set Up Evaluators with Multiple LLMs\nUse multiple LLMs to assess the same outputs. An output is considered a "pass" if all or the majority of LLMs approve, enabling you to see aggregate results and disagreements. Here’s how to set it up:\n   \n```python\nimport os\nos.environ["OPENAI_API_KEY"] = "YOUR KEY"\nos.environ["GEMINI_API_KEY"] = "YOUR KEY"\nos.environ["ANTHROPIC_API_KEY"] = "YOUR KEY"\n```\n\n### 2. Dataset Preparation\nDefine a dataset of user intents alongside generated emails to evaluate their appropriateness:\n\n```python\ndata = [\n    ["user input", "generated email"],\n    ... \n]\neval_df = pd.DataFrame(data, columns=["user input", "generated email"])\n```\n\n### 3. Define the Evaluation Criteria\nUtilize an evaluation template to set the judging criteria.\n\n```python\nfrom evidently.llm.templates import BinaryClassificationPromptTemplate\n\ncriteria = BinaryCla

In [47]:
answer = rag_structured('how do I install kafka on windows?', RagResponse)

In [48]:
print(answer.answer[:100])
print(answer.confidence)
print(answer.confidence_explanation)
print(answer.answer_type)
print(answer.followup_questions)

The provided context does not include information on how to install Kafka on Windows. Please consult
0.0
The relevant documentation does not mention Kafka or its installation process, indicating a lack of information on this topic.
reference
['What is Kafka?', 'Can you help with Kafka configuration?', 'Where can I find Kafka installation documentation?']


In [49]:
from pydantic import model_validator

class AnswerNotFound(BaseModel):
    explanation: str

class AnswerResponse(BaseModel):
    """
    If answer is found, 'answer' is populated.
    If no answer is found, 'answer_not_found' is populated.
    Only one of the two fields can be set at a time. Never both or neither.
    """

    answer_not_found: Optional[AnswerNotFound] = None
    found_answer: bool
    answer: Optional[RagResponse] = None

    @model_validator(mode="after")
    def check_consistency(self):
        if self.answer is not None and self.answer_not_found is not None:
            raise ValueError("Provide either 'answer' or 'answer_not_found', not both.")

        if self.answer is None and self.answer_not_found is None:
            raise ValueError("Provide either 'answer' or 'answer_not_found'.")

        return self

In [50]:
RagResponse.model_json_schema()

{'description': 'This model provides a structured answer with metadata about the response,\nincluding confidence, categorization, and follow-up suggestions.',
 'properties': {'answer': {'description': "The main answer to the user's question in markdown",
   'title': 'Answer',
   'type': 'string'},
  'found_answer': {'description': 'True if relevant information was found in the documentation',
   'title': 'Found Answer',
   'type': 'boolean'},
  'confidence': {'description': 'Confidence score from 0.0 to 1.0 indicating how certain the answer is',
   'title': 'Confidence',
   'type': 'number'},
  'confidence_explanation': {'description': 'A brief explanation of the confidence score, highlighting key factors that influenced it',
   'title': 'Confidence Explanation',
   'type': 'string'},
  'answer_type': {'description': 'The category of the answer',
   'enum': ['how-to',
    'explanation',
    'troubleshoting',
    'comparison',
    'reference'],
   'title': 'Answer Type',
   'type': 'str

In [51]:
answer = rag_structured('how do I install kafka on windows?', AnswerResponse)
answer

AnswerResponse(answer_not_found=AnswerNotFound(explanation='The provided documentation does not contain information regarding the installation of Kafka on Windows.'), found_answer=False, answer=None)

In [52]:
answer = rag_structured('how do I run llm evals?', AnswerResponse)
answer

AnswerResponse(answer_not_found=None, found_answer=True, answer=RagResponse(answer='To run LLM evaluations, follow these steps:\n\n1. **Connect to Evidently Cloud and Create a Project**:\n   You must first connect to [Evidently Cloud](/docs/setup/cloud) and [create a Project](/docs/platform/projects_manage).\n\n2. **Prepare the Dataset**:\n   Ensure you have a dataset with input questions and computed descriptors. You can create a dataset in a pandas DataFrame, for example:\n   ```python\n   data = [\n       ["Question 1?", "Expected Response 1"],\n       ["Question 2?", "Expected Response 2"]\n   ]\n   columns = ["question", "target_response"]\n   ref_data = pd.DataFrame(data, columns=columns)\n   ```\n\n3. **Create and Run the Report**:\n   Use the `TextEvals` Preset and run a report with your dataset. Here’s how you can do it:\n   ```python\n   from evidently.report import Report\n   report = Report([\n       TextEvals(),\n   ])\n   my_eval = report.run(ref_dataset, None)\n   ```\n 

## Valdiation error for logs

In [53]:
from pydantic import ValidationError

try:
    AnswerResponse()
except ValidationError as e:
    print("Validation error as expected:", e)

Validation error as expected: 1 validation error for AnswerResponse
found_answer
  Field required [type=missing, input_value={}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.12/v/missing
