In [30]:
import pandas as pd
df = pd.read_csv('wine-raitngs.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
#data = df.to_dict('records')
data = df.to_dict('records')[:1000]  # Use only 1,000 rows for now

df

Unnamed: 0,name,region,variety,rating,notes
0,1000 Stories Bourbon Barrel Aged Batch Blue Ca...,"Mendocino, California",Red Wine,91,"This is a very special, limited release of 100..."
1,1000 Stories Bourbon Barrel Aged Gold Rush Red...,California,Red Wine,89,The California Gold Rush was a period of coura...
2,1000 Stories Bourbon Barrel Aged Gold Rush Red...,California,Red Wine,90,The California Gold Rush was a period of coura...
3,1000 Stories Bourbon Barrel Aged Zinfandel 2013,"North Coast, California",Red Wine,91,"The wine has a deep, rich purple color. An int..."
4,1000 Stories Bourbon Barrel Aged Zinfandel 2014,California,Red Wine,90,Batch #004 is the first release of the 2014 vi...
...,...,...,...,...,...
32975,Byron Chardonnay 2006,"Santa Maria Valley, Central Coast, California",White Wine,88,Staying true to the consistent nature of Chard...
32976,Booker Vineyard White 2011,"Paso Robles, Central Coast, California",White Wine,92,The wine starts with honeysuckle and dried pea...
32977,Keenan Napa Valley Cabernet Sauvignon (375ML h...,"Napa Valley, California",Red Wine,91,The 2010 Cabernet Sauvignon is composed primar...
32978,Hearst Ranch Randolph 2015,"Paso Robles, Central Coast, California",Red Wine,89,The 2015 Randolph is a stunning wine as versat...


In [31]:
!pip install qdrant-client sentence-transformers



In [32]:
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient, models

print("Done")


Done


In [33]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [34]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [35]:
# Create collection to store books
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

  qdrant.recreate_collection(


True

In [36]:
print(len(data))


1000


In [37]:
#Optimized vectorization

#Batch encode all at once
texts = [doc["notes"] for doc in data]
vectors = encoder.encode(texts).tolist()

# Upload all points to Qdrant
qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=vector,
            payload=doc
        ) for idx, (doc, vector) in enumerate(zip(data, vectors))
    ]
)


In [39]:
!pip install --upgrade qdrant-client



In [52]:
# Step 1: Define the user's query for wine recommendation
query = "Suggest me an amazing Malbec wine from Argentina."

# Encode the query into a vector using the same encoder model
query_vector = encoder.encode(query).tolist()

# Step 2: Query Qdrant to find the most similar wines based on the query vector
# We use query_points as it's the recommended method over the deprecated search()
raw_hits = qdrant.query_points(
    collection_name="top_wines",
    query=query_vector,
    limit=3 # Retrieve the top 3 most similar wines
)

# Extract the actual list of ScoredPoint objects from the raw_hits.
# Based on previous error, raw_hits could be a tuple like ('points', [ScoredPoint, ...])
if isinstance(raw_hits, tuple) and len(raw_hits) > 1 and isinstance(raw_hits[1], list):
    hits = raw_hits[1]
else:
    hits = raw_hits # Assume it's already the list of hits if not in the tuple format

# Step 3: Display the retrieved wines in a clean, readable format
print("🔍 Top Retrieved Wines:\n" + "-" * 50)
for hit in hits:
    # Check if the hit is an instance of models.ScoredPoint (the expected type)
    if isinstance(hit, models.ScoredPoint):
        payload_to_print = hit.payload
        score_to_print = hit.score
    else:
        # Fallback: if it's not a ScoredPoint, assume the hit itself is the payload.
        payload_to_print = hit
        score_to_print = hit.get('score', 'N/A') if isinstance(hit, dict) else 'N/A'

    # Print the full payload (wine details) in a pretty-printed JSON format
    try:
        print(json.dumps(payload_to_print, indent=2))
    except TypeError:
        print(payload_to_print)

    # Print the similarity score for each retrieved wine
    print(f"Score: {score_to_print:.3f}" if isinstance(score_to_print, (int, float)) else f"Score: {score_to_print}")
    print("-" * 50)

# Step 4: Format the search results into a single string to be used as context for the LLM
search_results = []
for hit in hits:
    if isinstance(hit, models.ScoredPoint):
        search_results.append(json.dumps(hit.payload, indent=2))
    else:
        try:
            search_results.append(json.dumps(hit, indent=2))
        except TypeError:
            search_results.append(str(hit))

context = "\n\n".join(search_results)

# Step 5: Generate a recommendation using the Gemini API (instead of local LLM)
# This part will be executed in the browser environment, making a fetch call to the Gemini API.
# The `apiKey` will be automatically provided by the Canvas runtime.
js_code = f"""
async function getGeminiRecommendation() {{
    let chatHistory = [];
    let prompt = `You are a helpful wine specialist chatbot. Use the provided wine data to recommend wines.
Suggest me an amazing Malbec wine from Argentina.

Here are some options:
{context}`;

    chatHistory.push({{ role: "user", parts: [{{ text: prompt }}] }});
    const payload = {{ contents: chatHistory }};
    const apiKey = ""; // Canvas will automatically provide this in runtime
    const apiUrl = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${{apiKey}}`;

    try {{
        const response = await fetch(apiUrl, {{
            method: 'POST',
            headers: {{ 'Content-Type': 'application/json' }},
            body: JSON.stringify(payload)
        }});
        const result = await response.json();

        if (result.candidates && result.candidates.length > 0 &&
            result.candidates[0].content && result.candidates[0].content.parts &&
            result.candidates[0].content.parts.length > 0) {{
            const text = result.candidates[0].content.parts[0].text;
            // Display the response in the Colab output
            google.colab.output.setIframeOutput({{
                data: {{ 'text/plain': `LLM Response:\\n--------------------------------------------------\\n${{text}}` }}
            }});
        }} else {{
            google.colab.output.setIframeOutput({{
                data: {{ 'text/plain': ` LLM Response:\\n--------------------------------------------------\\nError: No valid response from LLM.` }}
            }});
        }}
    }} catch (error) {{
        google.colab.output.setIframeOutput({{
            data: {{ 'text/plain': `LLM Response:\\n--------------------------------------------------\\nError connecting to LLM: ${{error.message}}` }}
        }});
    }}
}}

getGeminiRecommendation();
"""

from IPython.display import display, Javascript
display(Javascript(js_code))


🔍 Top Retrieved Wines:
--------------------------------------------------
('points', [ScoredPoint(id=620, version=0, score=0.7736117472886996, payload={'name': 'Alamos Malbec 2012', 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 89, 'notes': 'A classically Argentine wine, the Alamos 2012 Malbec is blended with small portions of Syrah and Bonarda to meld deep dark cherry and blackberry flavors with hints of brown spice and vanilla. A long finish and firm tannins make this Malbec unforgettable.'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=619, version=0, score=0.7550131527667095, payload={'name': 'Alamos Malbec 2010', 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 88, 'notes': 'Malbec is the signature wine of the Mendoza region and represents the highest achievements of Argentine winemaking. The Alamos Malbec has dark, blackish purple color. The nose shows ripe black fruits, black pepper spice and floral notes. The mouthfeel is full yet soft and sup

<IPython.core.display.Javascript object>