In [1]:
import pandas as pd
df = pd.read_csv('../../top_rated_wines.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.sample(700).to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

700

In [2]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [4]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [5]:
# Create collection to store wines
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

  qdrant.recreate_collection(


True

In [6]:
# vectorize!
qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [7]:
user_prompt = "Suggest me an amazing Malbec wine from Argentina"

In [8]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'name': 'Catena Zapata Argentino Vineyard Malbec 2004', 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 98.0, 'notes': '"The single-vineyard 2004 Malbec Argentino Vineyard spent 17 months in new French oak. Remarkably fragrant and complex aromatically, it offers up aromas of wood smoke, creosote, pepper, clove, black cherry, and blackberry. Made in a similar, elegant style, it is the most structured of the three single vineyard wines, needing a minimum of a decade of additional cellaring. It should easily prove to be a 25-40 year wine. It is an exceptional achievement in Malbec. When all is said and done, Catena Zapata is the Argentina winery of reference – the standard of excellence for comparing all others. The brilliant, forward-thinking Nicolas Catena remains in charge, with his daughter, Laura, playing an increasingly large role. The Catena Zapata winery is an essential destination for fans of both architecture and wine in Mendoza. It is hard to believe, given the surge i

  hits = qdrant.search(


In [9]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [10]:
import torch
print(torch.cuda.is_available())  # Should return True

True


In [11]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "eagle0504/finetuned-deepseek-r1-distill-qwen-1.5b-by-openai-gsm8k-enhanced"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Move model to GPU
model = model.to("cuda")

In [12]:
import json
# Step 1: Semantic search for best wine matches
top_hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)

# Step 2: Format top wine data for the LLM
search_results = "\n".join(
    f"{hit.payload['name']} ({hit.payload['region']}): {hit.payload['notes']}" for hit in top_hits
)

# Step 3: Ask your LLM to generate a nice reply
filter_prompt = f"""
You are a helpful assistant. Extract the wine region and grape variety from the following request:
"{user_prompt}"

Respond in JSON format like this:
{{"region": "RegionName", "variety": "GrapeVariety"}}
"""
filter_inputs = tokenizer(filter_prompt, return_tensors="pt").to("cuda")
filter_outputs = model.generate(**filter_inputs, max_new_tokens=128)
filter_response = tokenizer.decode(filter_outputs[0], skip_special_tokens=True)

try:
    filters = json.loads(filter_response.split("```json")[-1].split("```")[0] if "```json" in filter_response else filter_response)
except json.JSONDecodeError:
    filters = {}

try:
    filters = json.loads(filter_response.split("```json")[-1].split("```")[0] if "```json" in filter_response else filter_response)
except json.JSONDecodeError:
    filters = {}

# Step 2: Prepare Qdrant filter dynamically
qdrant_filter = {"must": []}
if "region" in filters:
    qdrant_filter["must"].append({"key": "region", "match": {"value": filters["region"]}})
if "variety" in filters:
    qdrant_filter["must"].append({"key": "variety", "match": {"value": filters["variety"]}})


  top_hits = qdrant.search(
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [13]:
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer

qdrant = QdrantClient(host="localhost", port=6333)
encoder = SentenceTransformer("all-MiniLM-L6-v2")  # Your embedding model

top_hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3,
    query_filter=qdrant_filter
)

# Step 4: Format hits for LLM response generation
search_results = "\n".join(
    f"{hit.payload['name']} ({hit.payload['region']}): {hit.payload['notes']}" for hit in top_hits
)

# Step 5: Ask the LLM to generate a final friendly response
response_prompt = f"""You are a wine expert. A user asked: "{user_prompt}"

Here are some matching wines:
{search_results}

Based on these, suggest one and explain why it's a great match.
Assistant:"""

response_inputs = tokenizer(response_prompt, return_tensors="pt").to("cuda")
response_outputs = model.generate(**response_inputs, max_new_tokens=256, do_sample=True)
response_text = tokenizer.decode(response_outputs[0], skip_special_tokens=True)

# Extract just the Assistant's reply
final_reply = response_text.split("Assistant:")[-1].strip()
print(final_reply)

  top_hits = qdrant.search(


ResponseHandlingException: [WinError 10061] No connection could be made because the target machine actively refused it