In [9]:
import pandas as pd
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
import os
from dotenv import load_dotenv
df = pd.read_csv('top_rated_wines.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.to_dict('records')
df

Unnamed: 0,name,region,variety,rating,notes
0,3 Rings Reserve Shiraz 2004,"Barossa Valley, Barossa, South Australia, Aust...",Red Wine,96.0,Vintage Comments : Classic Barossa vintage con...
1,Abreu Vineyards Cappella 2007,"Napa Valley, California",Red Wine,96.0,Cappella is a proprietary blend of two clones ...
2,Abreu Vineyards Cappella 2010,"Napa Valley, California",Red Wine,98.0,Cappella is one of the oldest vineyard sites i...
3,Abreu Vineyards Howell Mountain 2008,"Howell Mountain, Napa Valley, California",Red Wine,96.0,When David purchased this Howell Mountain prop...
4,Abreu Vineyards Howell Mountain 2009,"Howell Mountain, Napa Valley, California",Red Wine,98.0,"As a set of wines, it is hard to surpass the f..."
...,...,...,...,...,...
1360,Lewis Cellars Alec's Blend Red 2002,"Napa Valley, California",Red Wine,96.0,Number 12 on
1361,Lewis Cellars Cabernet Sauvignon 2002,"Napa Valley, California",Red Wine,96.0,Showcasing the unique personalities of small h...
1362,Lewis Cellars Cuvee L Cabernet Sauvignon 2015,"Napa Valley, California",Red Wine,96.0,"Straight from James Fenimore Cooper’s novel, L..."
1363,Lewis Cellars Reserve Cabernet Sauvignon 2010,"Napa Valley, California",Red Wine,96.0,


In [10]:
#it is widely used for embedding or vector representation in nlp
#This model is trained to generate embeddings for sentences or texts that capture semantic meanin
#it is consist of 6 layers
encoder = SentenceTransformer('all-MiniLM-L6-v2')

In [11]:

# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [12]:
# Create collection to store wine-related data in a searchable format
#all data is orginized in qdrant by collection
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE # The metric used to compute similarity between vectors.
    )
)


  qdrant.recreate_collection(


True

In [13]:
# vectorize! and upload our data to it

qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),#.tolist() converts the vector (a NumPy array) into a plain Python list, which is required by Qdrant.
            payload=doc
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [14]:
# let do search 

hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode("99 points Cabernet Sauvignon from Napa Valley").tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score) ##hit score tells us about the likelihood ssocre

{'name': 'Kapcsandy Family Winery State Lane Cabernet Sauvignon Grand Vin 2017', 'region': 'Yountville, Napa Valley, California', 'variety': 'Red Wine', 'rating': 96.0, 'notes': '100% Cabernet Sauvignon'} score: 0.7492028628033235
{'name': 'Lewis Cellars Cabernet Sauvignon 2002', 'region': 'Napa Valley, California', 'variety': 'Red Wine', 'rating': 96.0, 'notes': 'Showcasing the unique personalities of small hillside vineyards from Pritchard Hill, Oakville and Rutherford, the 2002 Napa Valley Cabernet delivers compelling aromas of mocha, ripe berries, tobacco and sweet oak spice. The wine is 100% Cabernet Sauvignon, complex, rich and focused. With a deep core of black fruit and traces of briar and vanilla, it turns chocolaty and long on the palate with serious, integrated tannins.'} score: 0.7331374842618403
{'name': 'Anakota Helena Montana Vineyard Cabernet Sauvignon 2013', 'region': 'Knights Valley, Sonoma County, California', 'variety': 'Red Wine', 'rating': 96.0, 'notes': 'Blend: 1

In [15]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [16]:
#let's integrate this with open ai model
from openai import OpenAI
load_dotenv()
openai_api_key=os.getenv("OPENAI_API_KEY")

# Access the API key
client = OpenAI(
  api_key=openai_api_key
)

completion = client.chat.completions.create(
    model="gpt-4o-mini",
    store=True,
    messages=[
        {"role": "system", "content": "You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their requests."},
        {"role": "user", "content": "Suggest me an amazing Malbec wine from Argentina"},
        {"role": "assistant", "content": str(search_results)}
    ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content='An incredible Malbec wine from Argentina that I highly recommend is the **Catena Zapata Malbec Argentino**. This wine showcases the richness and depth characteristic of Argentinian Malbec. \n\n### Catena Zapata Malbec Argentino\n- **Region**: Mendoza, Argentina\n- **Tasting Notes**: This wine features dark fruit flavors like blackberry and plum, with notes of cocoa, tobacco, and floral hints. It offers a well-structured palate with velvety tannins and a long finish.\n- **Why it’s Amazing**: The Catena family is renowned for elevating Argentinian wines to global recognition, and this Malbec reflects their dedication to quality and terroir.\n\nAnother excellent choice is the **Susana Balbo Signature Malbec**:\n- **Region**: Mendoza, Argentina\n- **Tasting Notes**: This wine showcases ripe black fruit, spice, and hints of vanilla from oak aging, balanced with good acidity and a smooth mouthfeel.\n\nBoth options highlight the best of Argentine Malbec and woul