### Import Dependencies

In [1]:
import openai
import instructor
from qdrant_client import QdrantClient

from pydantic import BaseModel, Field

### Mock Example

In [2]:
prompt = """
You are a helpful assistant that can answer questions and help with tasks.
Question: What is the capital of France?
"""

In [14]:
openai_response = openai.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": prompt}
    ],
    temperature=0,
)
print(openai_response.choices[0].message.content)

The capital of France is Paris.


In [15]:
openai_response

ChatCompletion(id='chatcmpl-D2ltTFJ65w8NXqLbXmLoSrUAncP6i', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of France is Paris.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1769553131, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_c4585b5b9c', usage=CompletionUsage(completion_tokens=7, prompt_tokens=31, total_tokens=38, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

### Add Instructor (Structured Outputs)

In [5]:
client = instructor.from_openai(openai.OpenAI())

In [7]:
class RAGGenerationRepsonse(BaseModel):
    answer: str = Field(description="The answer to the question")

In [None]:
# create_with_completion returns a tuple of (response, raw_response) while create returns a response object only
response, raw_response = client.chat.completions.create_with_completion(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": prompt}
    ],
    temperature=0,
    response_model=RAGGenerationRepsonse
)

In [17]:
response

RAGGenerationRepsonse(answer='The capital of France is Paris.')

In [18]:
raw_response

ChatCompletion(id='chatcmpl-D2luAXf1IRUsgyA3ft7CDMtie99hz', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=[ChatCompletionMessageFunctionToolCall(id='call_uqqq1r7PNjcnhGp8u8NC5kYK', function=Function(arguments='{"answer":"The capital of France is Paris."}', name='RAGGenerationRepsonse'), type='function')]))], created=1769553174, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_29330a9688', usage=CompletionUsage(completion_tokens=11, prompt_tokens=104, total_tokens=115, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [20]:
class RAGGenerationRepsonseReasoning(BaseModel):
    answer: str = Field(description="The answer to the question")
    reasoning: str = Field(description="The reasoning for the answer")

In [21]:
response, raw_response = client.chat.completions.create_with_completion(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": prompt}
    ],
    temperature=0,
    response_model=RAGGenerationRepsonseReasoning
)

In [22]:
response

RAGGenerationRepsonseReasoning(answer='The capital of France is Paris.', reasoning='Paris is the largest city in France and serves as its political, economic, and cultural center. It has been the capital since the 10th century.')

### RAG Example

In [23]:
class RAGGenerationRepsonse(BaseModel):
    answer: str = Field(description="The answer to the question")

In [None]:
def get_embeddings(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model
    )

    return response.data[0].embedding

def retrieve_data(query, qdrant_client, k=5):
    query_embedding = get_embeddings(query)
    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k
    )

    retrieved_context_ids = []
    retrieved_context= []
    similiarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload['parent_asin'])
        retrieved_context.append(result.payload['description'])
        similiarity_scores.append(result.score)
        retrieved_context_ratings.append(result.payload['average_rating'])

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "similiarity_scores": similiarity_scores,
        "retrieved_context_ratings": retrieved_context_ratings
    }

def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context['retrieved_context_ids'], context['retrieved_context'], context['retrieved_context_ratings']):
        formatted_context += f"- ID: {id}, Rating: {rating}, Description: {chunk}\n"

    return formatted_context

def build_prompt(preprocessed_context, question):
    prompt = f"""
    You are a shopping assistent that can answer questions about the products in stock.
    
    You will be given a question and a list of context.
    
    Instructions:
    - You need to answer the questions based on the provided context only.
    - Never use word context and refer to it as a the available products.
    
    Context:
    {preprocessed_context}
    
    Question:
    {question}
    """

    return prompt

def generate_answer(prompt):

    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0,
        response_model=RAGGenerationRepsonse
    )

    return response

def rag_pipeline(question, qdrant_client, k=5):

    retrieved_context = retrieve_data(question, qdrant_client, k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "datamodel": answer,
        "answer": answer.answer,
        'question': question,
        "retrieved_context_ids": retrieved_context['retrieved_context_ids'],
        "retrieved_context": retrieved_context['retrieved_context'],
        "similiarity_scores": retrieved_context['similiarity_scores']
    }

    return final_result

In [33]:
qdrant_client = QdrantClient(url="http://localhost:6333")

In [34]:
output = rag_pipeline("Can I get charging cable, please suggest me a good one", qdrant_client)

In [35]:
output

{'datamodel': RAGGenerationRepsonse(answer='A good charging cable option available is the BLACKSYNCZE USB C to Lightning Cable 2Pack 6FT. It is MFi Certified, supports 20W PD fast charging, and is compatible with a wide range of iPhone models including iPhone 13, 12, 11, XR, XS, X, 8, 7, 6s, and SE2020. It features a durable nylon braided design and comes with a full lifetime warranty and 24/7 customer service. This cable can charge an iPhone 13 or 12 up to 50% in just 30 minutes, which is 2.5 times faster than a standard lightning cable.'),
 'answer': 'A good charging cable option available is the BLACKSYNCZE USB C to Lightning Cable 2Pack 6FT. It is MFi Certified, supports 20W PD fast charging, and is compatible with a wide range of iPhone models including iPhone 13, 12, 11, XR, XS, X, 8, 7, 6s, and SE2020. It features a durable nylon braided design and comes with a full lifetime warranty and 24/7 customer service. This cable can charge an iPhone 13 or 12 up to 50% in just 30 minutes

In [38]:
print(output['answer'])

A good charging cable option available is the BLACKSYNCZE USB C to Lightning Cable 2Pack 6FT. It is MFi Certified, supports 20W PD fast charging, and is compatible with a wide range of iPhone models including iPhone 13, 12, 11, XR, XS, X, 8, 7, 6s, and SE2020. It features a durable nylon braided design and comes with a full lifetime warranty and 24/7 customer service. This cable can charge an iPhone 13 or 12 up to 50% in just 30 minutes, which is 2.5 times faster than a standard lightning cable.
