Week 6 focused on `Product Design` for RAG apps. This notebook gives code samples and links to code samples for the main topics

For reference, the slides are [here](https://drive.google.com/file/d/1StJIwHwP06Chzqvg94irltf-W1ELSw0o/view)

| Topic | Slides | Code Sample |
|-------|-------------------|-------------|
| Feedback design principles | 3-10 | - |
| Citations | 11-15 | 2nd code cell in this notebook |
| Streaming | 16-26 | All cells in this notebook |
| Chain of Thought | 31-36 | [Instructor docs](https://python.useinstructor.com/concepts/prompting/#modular-chain-of-thought) and cells 2-3 in [week 2 notebook](https://github.com/567-labs/systematically-improving-rag/blob/main/week2_question_classification/example_notebook/analyze_clusters.ipynb) |
| Validators | 37-39 | [Instructor Docs](https://python.useinstructor.com/concepts/reask_validation/#step-1-define-the-response-model-with-validators) |



In [1]:
# Here's an example of basic streaming applied to different use cases:

import openai
import instructor
import time
from pprint import pprint
from pydantic import BaseModel
from IPython.display import clear_output

client = instructor.from_openai(openai.OpenAI())


class ChatResponse(BaseModel):
    content: str
    follow_up_question: list[str]


reviews_data = """
<reviews>
    <review>
        <review_id>1</review_id>
        <review_text>It is lightweight due to the carbon fiber handle, which is helpful for cutting small branches. I just wish the blade lasted longer. It was dull after about a week.</review_text>
    </review>
    <review>
        <review_id>2</review_id>
        <review_text>I like the quickchange system for replacing blades. It doesn't require tools.</review_text>
    </review>
    <review>
        <review_id>3</review_id>
        <review_text>You can operate on high speed or low speed. On low speed, the battery last forever.</review_text>
    </review>
    <review>
        <review_id>4</review_id>
        <review_text>The blades that came on it are sharp but lasted only a few hours. The replacement blades are better.</review_text>
    </review>
</reviews>
"""

resp = client.chat.completions.create_partial(
    model="gpt-4o",
    messages=[
        {
            "role": "system",
            "content": "You are an assistant responding to a user question on a hardware store website.",
        },
        {
            "role": "user",
            "content": f"""The user asked:
How durable are the blades on this saw?
         
Here are some relevant user reviews:
{reviews_data}
""",
        },
    ],
    response_model=ChatResponse,
    stream=True,
)

for chunk in resp:
    clear_output(wait=True)
    pprint(chunk.model_dump())
    time.sleep(0.05)

{'content': 'Based on user reviews, the durability of the blades that come '
            'with the saw seems to be subpar. One user mentioned that the '
            'blade was dull after about a week, and another said it lasted '
            'only a few hours. However, it appears that the replacement blades '
            'offer better durability.',
 'follow_up_question': ['Would you like suggestions for more durable '
                        'replacement blades?',
                        'Do you need information on where to purchase '
                        'replacement blades?']}


## Citations

In [2]:
class Citation(BaseModel):
    review_id: str


class ChatResponse(BaseModel):
    content: str
    citations: list[Citation]


resp = client.chat.completions.create_partial(
    model="gpt-4o",
    messages=[
        {
            "role": "system",
            "content": "You are a knowledgeable assistant. When providing information, cite your sources using markdown-like notation, e.g., [string](1), [string](2), etc.",
        },
        {
            "role": "user",
            "content": f"""

        Here are some relevant reviews from previous purchasers of a handsaw the user is asking about:
        {reviews_data}
        
        <citation_examples>
        To cite a source, use markdown-style link notation. For example: [relevant text](source number).
        Citations should be integrated naturally within the sentence, not just appended at the end. 
        This approach ensures a smooth flow of information while providing proper attribution.
        For instance: "The [1980 'Miracle on Ice'](1) is considered one of the greatest upsets in Olympic history" is preferable to "There was a great upset in Olympic history. [Miracle on Ice](1)".
        </citation_examples>
         
         Answer the question using the sources provided. using markdown notation for citation
         """,
        },
        {"role": "user", "content": "What are the best features of this saw?"},
    ],
    response_model=ChatResponse,
    temperature=0.0,
    stream=True,
)

for chunk in resp:
    clear_output(wait=True)
    pprint(chunk.model_dump())
    time.sleep(0.1)

{'citations': [{'review_id': '1'}, {'review_id': '2'}, {'review_id': '3'}],
 'content': 'The handsaw has several notable features. It is lightweight due '
            'to the carbon fiber handle, which makes it easier to use for '
            'cutting small branches [1]. Additionally, the quickchange system '
            "for replacing blades is very convenient as it doesn't require any "
            'tools [2]. Another useful feature is the ability to operate at '
            'different speeds, with the low-speed setting allowing the battery '
            'to last a long time [3].'}


## Streaming With Tools

In [3]:
from typing import Optional
from pydantic import BaseModel


class SearchQuery(BaseModel):
    query: str
    start_date: Optional[str]
    end_date: Optional[str]


resp = client.chat.completions.create_partial(
    model="gpt-4o",
    messages=[
        {
            "role": "system",
            "content": "You are an AI assistant that understands and interprets search queries. Your task is to parse the user's input into a structured SearchQuery and provide an explanation of your interpretation. the date is 2024-09-15",
        },
        {
            "role": "user",
            "content": "Find documents about the XYZ 3000 drill's battery life from last week",
        },
    ],
    response_model=SearchQuery,
    stream=True,
)

for chunk in resp:
    clear_output(wait=True)
    print(chunk.model_dump_json(indent=2))
    time.sleep(0.1)

{
  "query": "XYZ 3000 drill battery life",
  "start_date": "2024-09-08",
  "end_date": "2024-09-14"
}
