In [23]:
import openai
import os
import json

from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

from langsmith import Client

In [3]:
qdrant_client = QdrantClient(url="http://localhost:6333")

### Downloaad all data from Qdrant

In [5]:
all_points = qdrant_client.scroll(
    collection_name="Amazon-items-collection-00",
    limit=1000,
    offset=None,
    with_payload=True,
    with_vectors=False
)



In [21]:
all_points[0][0].payload

{'description': 'BOVKE Travel Carrying Case for Samaung T7 Shield / T7 / T7 Touch Portable SSD 500GB 1TB 2TB USB 3.2 External Solid State Drives, Extra Mesh Pocket for USB Cables and More Accessories, Black Case Only! ( samsung ssd & accessories not included )This external hard drive case perfectly fit for: Samsung T7 Shield Portable SSD, Samsung T7 Portable SSD, Samsung T7 Touch 500GB 1TB 2TB USB 3.2 Portable Solid State Drives, provide excellent protection, keeps the solid state external hard drives safe from damage and prolongs the service life of your T7 external ssd. This samsung t7 case bag is featured with 2 elastic bands inside, securely store the samsung t7 shield portable ssd, and the T7 Carrying Case can prevent the Samsung external ssd from falling, protects your external solid state drives from tossing around while on the go, gives your portable ssd great protection. This T7 shield hard drive carrying case comes with a mesh pocket to accommodate the USB cables of the Samsu

In [10]:
all_context = [{"id": data.payload["parent_asin"], "text":data.payload["description"]} for data in all_points[0]]

In [24]:
all_context

[{'id': 'B01N1P97SL',
  'text': 'BOVKE Travel Carrying Case for Samaung T7 Shield / T7 / T7 Touch Portable SSD 500GB 1TB 2TB USB 3.2 External Solid State Drives, Extra Mesh Pocket for USB Cables and More Accessories, Black Case Only! ( samsung ssd & accessories not included )This external hard drive case perfectly fit for: Samsung T7 Shield Portable SSD, Samsung T7 Portable SSD, Samsung T7 Touch 500GB 1TB 2TB USB 3.2 Portable Solid State Drives, provide excellent protection, keeps the solid state external hard drives safe from damage and prolongs the service life of your T7 external ssd. This samsung t7 case bag is featured with 2 elastic bands inside, securely store the samsung t7 shield portable ssd, and the T7 Carrying Case can prevent the Samsung external ssd from falling, protects your external solid state drives from tossing around while on the go, gives your portable ssd great protection. This T7 shield hard drive carrying case comes with a mesh pocket to accommodate the USB cab

### Render a prompt to generate synthetic Eval reference dataset

In [29]:
output_schema = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "Suggested question.",
            },
            "chunk_id": {
                "type": "array",
                "description": "IDs of the chunks that could be used to answer the question.",
            },
            "answer_example": {
                "type": "string",
                "description": "Suggested answer grounded in the context.",
            },
            "reasoning": {
                "type": "string",
                "description": "Reasoning why the question could be answered by the chunks.",
            },
        },
    },
}

SYSTEM_PROMPT = f"""
I am building a RAG application. I have a collection of 50 chunks of text.
The RAG application will act as a shopping assistant that can answer questions about the stock of the products we have available.
I will provide all of the available products to you with IDs of each chunk.
I want you to come up with 30 questions to which the answers could be grounded in the chunk context.
The questions should imitate a potential real user of this RAG system.
As an output I need you to provide me the list of questions and the IDs of the chunks that could be used to answer them.
Also, provide an example answer to the question given the context of the chunks
Also, provide the reason why you chose the chunks to answer the questions.
Construct 10 questions that could use multiple chunk in the answer.
Construct 15 questions that could use single chunk in the answer.
Construct 5 questions that cannnot be answered by the available chunks.

<OUTPUT JSON SCHEMA>
{json.dumps(output_schema, indent=2)}
</OUTPUT JSON SCHEMA>

I need to be able to parse the json output.
"""

USER_PROMPT =f"""
Here is the list of chunks, each list element is a dictionary with id and text fields:
{all_context}
"""

In [27]:
print(SYSTEM_PROMPT)


I am building a RAG application. I have a collection of 50 chunks of text.
The RAG application will act as a shopping assistant that can answer questions about the stock of the products we have available.
I will provide all of the available products to you with IDs of each chunk.
I want you to come up with 30 questions to which the answers could be grounded in the chunk context.
The questions should imitate a potential real user of this RAG system.
As an output I need you to provide me the list of questions and the IDs of the chunks that could be used to answer them.
Also, provide an example answer to the question given the context of the chunks
Also, provide the reason why you chose the chunks to answer the questions.
Construct 10 questions that could use multiple chunk in the answer.
Construct 15 questions that could use single chunk in the answer.
Construct 5 questions that cannnot be answered by the available chunks.

<OUTPUT JSON SCHEMA>
{
  "type": "array",
  "items": {
    "typ

In [30]:
print(USER_PROMPT)


Here is the list of chunks, each list element is a dictionary with id and text fields:
[{'id': 'B01N1P97SL', 'text': 'BOVKE Travel Carrying Case for Samaung T7 Shield / T7 / T7 Touch Portable SSD 500GB 1TB 2TB USB 3.2 External Solid State Drives, Extra Mesh Pocket for USB Cables and More Accessories, Black Case Only! ( samsung ssd & accessories not included )This external hard drive case perfectly fit for: Samsung T7 Shield Portable SSD, Samsung T7 Portable SSD, Samsung T7 Touch 500GB 1TB 2TB USB 3.2 Portable Solid State Drives, provide excellent protection, keeps the solid state external hard drives safe from damage and prolongs the service life of your T7 external ssd. This samsung t7 case bag is featured with 2 elastic bands inside, securely store the samsung t7 shield portable ssd, and the T7 Carrying Case can prevent the Samsung external ssd from falling, protects your external solid state drives from tossing around while on the go, gives your portable ssd great protection. This 

In [None]:
response = openai.chat.completions.create(
    model="gpt-5-mini",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT},
    ],
    reasoning_effort="minimal",
)



In [32]:
print(response.choices[0].message.content)

[
  {
    "question": "Does the BOVKE carrying case fit the Samsung T7 Shield and other T7 models?",
    "chunk_id": [
      "B01N1P97SL"
    ],
    "answer_example": "Yes. The BOVKE Travel Carrying Case is designed to perfectly fit the Samsung T7 Shield, Samsung T7, and Samsung T7 Touch (500GB/1TB/2TB) portable SSDs and provides interior elastic bands and a mesh pocket for cables.",
    "reasoning": "Chunk B01N1P97SL explicitly lists compatible Samsung T7 models and describes the case features and fit."
  },
  {
    "question": "What materials and protection does the Samsung T7 carrying case offer?",
    "chunk_id": [
      "B01N1P97SL"
    ],
    "answer_example": "The case is made of hard EVA and PU material with an ultra-soft velvet interior lining; it's waterproof and shockproof to protect the Samsung T7 from extrusion, dust, splashes, scratches, bumps, and drops.",
    "reasoning": "Chunk B01N1P97SL details the construction materials and protective features of the case."
  },
  {

In [33]:
json_output = response.choices[0].message.content
json_output = json.loads(json_output)

In [34]:
json_output

[{'question': 'Does the BOVKE carrying case fit the Samsung T7 Shield and other T7 models?',
  'chunk_id': ['B01N1P97SL'],
  'answer_example': 'Yes. The BOVKE Travel Carrying Case is designed to perfectly fit the Samsung T7 Shield, Samsung T7, and Samsung T7 Touch (500GB/1TB/2TB) portable SSDs and provides interior elastic bands and a mesh pocket for cables.',
  'reasoning': 'Chunk B01N1P97SL explicitly lists compatible Samsung T7 models and describes the case features and fit.'},
 {'question': 'What materials and protection does the Samsung T7 carrying case offer?',
  'chunk_id': ['B01N1P97SL'],
  'answer_example': "The case is made of hard EVA and PU material with an ultra-soft velvet interior lining; it's waterproof and shockproof to protect the Samsung T7 from extrusion, dust, splashes, scratches, bumps, and drops.",
  'reasoning': 'Chunk B01N1P97SL details the construction materials and protective features of the case.'},
 {'question': 'How many items and projects are included in 

In [35]:
len(json_output)

58

In [36]:
points = qdrant_client.scroll(
    collection_name="Amazon-items-collection-00",
    scroll_filter=Filter(
        must=[
            FieldCondition(
                key="parent_asin",
                match=MatchValue(value="B0B64X77P7")
            )
        ]
    ),
    limit=1000,
    with_payload=True,
    with_vectors=False
)[0]

In [37]:
points[0].payload

{'description': '4 in 1 Ethernet Adapter and 3 Ports USB OTG Hub for Fire TV Stick 4K/Chromecast/Google Home Mini/Raspberry Pi Zero and Other Streaming TV Sticks,Micro USB OTG Cable HUB with Powered Network Adapter Add extra wired Ethernet & USB 3 ports extra storage to your streaming device for connecting wired network,flash drive,wireless keyboard or whatever USB peripherals,Stable work at the same time.(please note: Not compatible with Fire Stick 1, Roku Stick/Express,Fire HD 10,Google Home Mini cellphones, tablets, and Laptops,USB only for charging.) Support 10/100M wired network Ethernet connection, faster and more stableÔºåand 3 ports (480Mbps) of Simultaneous data transfer possible.no buffering, no lag. 4-in-1 integrated design, no need to combine, the signal transmission is more stable. The length is 3.3 ft. It is easy to operate and allows you to place it anywhere without worrying about loose connections. Plug & Play,no additional driver/software needed. Just need to connect i

In [38]:
def get_description(parent_asin:str) -> str:

    points = qdrant_client.scroll(
        collection_name="Amazon-items-collection-00",
        scroll_filter=Filter(
            must=[
                FieldCondition(
                    key="parent_asin",
                    match=MatchValue(value=parent_asin)
                )
            ]
        ),
        limit=1000,
        with_payload=True,
        with_vectors=False
    )[0]

    return points[0].payload["description"]

In [39]:
get_description("B0B64X77P7")

'4 in 1 Ethernet Adapter and 3 Ports USB OTG Hub for Fire TV Stick 4K/Chromecast/Google Home Mini/Raspberry Pi Zero and Other Streaming TV Sticks,Micro USB OTG Cable HUB with Powered Network Adapter Add extra wired Ethernet & USB 3 ports extra storage to your streaming device for connecting wired network,flash drive,wireless keyboard or whatever USB peripherals,Stable work at the same time.(please note: Not compatible with Fire Stick 1, Roku Stick/Express,Fire HD 10,Google Home Mini cellphones, tablets, and Laptops,USB only for charging.) Support 10/100M wired network Ethernet connection, faster and more stableÔºåand 3 ports (480Mbps) of Simultaneous data transfer possible.no buffering, no lag. 4-in-1 integrated design, no need to combine, the signal transmission is more stable. The length is 3.3 ft. It is easy to operate and allows you to place it anywhere without worrying about loose connections. Plug & Play,no additional driver/software needed. Just need to connect it to an availabl

### Create Eval dataset in Langsmith

In [44]:
client = Client(api_key=os.environ["LANGSMITH_API_KEY"])

In [69]:
dataset_name = "rag-evaluation-dataset-v3"
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Evaluation dataset for RAG application"
)

In [70]:
for item in json_output:
    client.create_example(
        dataset_id=dataset.id,
        inputs={"question":item["question"]},
        outputs={
            "ground_truth": item["answer_example"],
            "reference_context_ids": item["chunk_id"],
            "reference_description": [get_description(id) for id in item["chunk_id"]]
        }
    )

In [80]:
from langsmith import Client

client = Client()
for dataset in client.list_datasets():
    print(f"{dataset.name} ‚Äî {dataset.id}")

rag-evaluation-dataset-v3 ‚Äî b9bb66b2-c479-4584-8177-f1b746173f48
rag-evaluation-dataset-v2 ‚Äî 0f8f7296-7d1a-4065-ab9d-cc13b7fb5701
rag-evaluation-dataset ‚Äî 30d87d53-8655-4fbd-89b9-cf4074690fc6


In [84]:
from langsmith import Client

client = Client()
dataset_id = "b9bb66b2-c479-4584-8177-f1b746173f48"

for example in client.list_examples(dataset_id=dataset_id):
    print("ID:", example.id)
    print("Inputs:", example.inputs)
    print("Outputs:", example.outputs)
    print("Reference IDs:", example.outputs.get("reference_context_ids"))

    print("-" * 40)

ID: c116a5df-386c-4dc0-92e6-0c4d32d25183
Inputs: {'question': 'Can I mount the 100" LopBast projector screen to the ceiling and will it support 4K input from a Fire TV Cube?'}
Outputs: {'ground_truth': 'Yes ‚Äî the LopBast 100" manual screen supports wall and ceiling installation and is compatible with 4K video. If you use a Fire TV Cube or other 4K source (which may require a downstream HDMI connection), the screen will display 4K input as it supports 4K/Ultra HD projection.', 'reference_context_ids': ['B09Y5P41L8', 'B0B64X77P7'], 'reference_description': ["LopBast Screen 100INCH Manual Pull Down White Projector Screen 16:9 1.2 Gain Retractable Auto-Locking 4K 8K 3D Ultra HD for Home Theater Movie Office Game Projection Screen with Slow Retract Mechanism „ÄêHigh Quality Material„Äë Metal housing design, can effectively prevent damage and deformation during transportation. The projector screen uses a composite fabric of multiple layers, make the surface smoother and flatter, fully blac

In [83]:
from langsmith import Client

client = Client()
dataset_id = "b9bb66b2-c479-4584-8177-f1b746173f48"

for example in client.list_examples(dataset_id=dataset_id):
    ids = example.outputs.get("reference_context_ids") or []
    if not ids:
        print("Missing reference_context_ids:", example.id)

Missing reference_context_ids: d4789a6f-68cd-4acf-89ba-291427fc36af
