In [2]:
import pandas as pd
df = pd.read_csv('../../top_rated_wines.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.sample(700).to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

700

In [3]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [5]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [6]:
# Create collection to store wines
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

  qdrant.recreate_collection(


True

In [7]:
# vectorize!
qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [8]:
user_prompt = "Suggest me an amazing Malbec wine from Argentina"

In [9]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'name': 'Bodega Colome Altura Maxima Malbec 2012', 'region': 'Salta, Argentina', 'variety': 'Red Wine', 'rating': 96.0, 'notes': 'Winemaker Thibaut Delmotte has crafted wines of distinction and international acclaim for Colome. He believes the Malbec from Altura Maxima Vineyard is the embodiment of two extremes - a traditional grape variety from his French origins made from the vineyard that challenges all convention in the modern viticultural world.'} score: 0.6179682167169551
{'name': 'Catena Zapata Nicasia Vineyard Malbec 2004', 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 96.0, 'notes': '"The single-vineyard 2004 Malbec Nicasia Vineyard is located in the Altamira district of Mendoza. It was aged for 18 months in new French oak. Opaque purple-colored, it exhibits a complex perfume of pain grille, scorched earth, mineral, licorice, blueberry, and black cherry. Thick on the palate, bordering on opulent, it has layers of fruit, silky tannins, and a long, fruit-filled finish

  hits = qdrant.search(


In [10]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [11]:
import torch
print(torch.cuda.is_available())  # Should return True

True


In [12]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "eagle0504/finetuned-deepseek-r1-distill-qwen-1.5b-by-openai-gsm8k-enhanced"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Move model to GPU
model = model.to("cuda")

In [14]:
# Step 1: Semantic search for best wine matches
top_hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)

# Step 2: Format top wine data for the LLM
search_results = "\n".join(
    f"{hit.payload['name']} ({hit.payload['region']}): {hit.payload['notes']}" for hit in top_hits
)

# Step 3: Ask your LLM to generate a nice reply
prompt = f"""You are a wine expert. A user asked: "{user_prompt}"

Here are some matching wines:
{search_results}

Based on these, suggest one and explain why it's a great Malbec wine from Argentina.
Assistant:"""

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=256, do_sample=True)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response.split("Assistant:")[-1].strip())

  top_hits = qdrant.search(
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


To suggest one and explain why it's a great Malbec wine from Argentina, we need to evaluate the qualities of the provided wines and identify the standout features of "Catena Zapata Nicasia Vineyard Malbec 2004" based on the criteria of being from Argentina and being a high-quality Malbec.

1. **Location and Region**: The wine is from Barbaresco, Piedmont, Italy. While it is not from Argentina, it is a significant example of a high-quality Malbec wine from a region with rich history and strategic location in the Italian wine circuit.

2. **Aromas and Characters**: The wine exhibits complex aromas of pain grille, Scorched Earth, mineral, licorice, blueberry, and black cherry. These aromas are typical of modern Malbec wines and add to the depth of the experience.

3. **Pallage**: The wine is thick on the palate with layers of fruit, silky tannins, and a long, fruit-filled finish. This balance of complexity and balance is a hallmark of some of the best Malbec wines.

4. **Aging**: The wine