In [2]:
import pandas as pd

df = pd.read_csv('../../top_rated_wines.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.sample(700).to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

700

In [3]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [4]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [5]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [6]:
# Create collection to store wines
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

True

In [7]:
# vectorize!
qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [18]:
user_prompt = "what is the rating of Almaviva (1.5 Liter Magnum) 2018 ?"

In [19]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'name': 'Chateau Sansonnet (Futures Pre-Sale) 2019', 'region': 'St. Emilion, Bordeaux, France', 'variety': 'Red Wine', 'rating': 96.0, 'notes': 'The Barrel Sample for this wine is above 14% ABV. '} score: 0.5232710381504038
{'name': 'Chateau Smith Haut Lafitte (6-Pack OWC Futures Pre-Sale) 2019', 'region': 'Pessac-Leognan, Bordeaux, France', 'variety': 'Red Wine', 'rating': 98.0, 'notes': 'The Barrel Sample for this wine is above 14% ABV. '} score: 0.5232710381504038
{'name': 'Chateau Peby Faugeres (Futures Pre-Sale) 2019', 'region': 'St. Emilion, Bordeaux, France', 'variety': 'Red Wine', 'rating': 96.0, 'notes': 'The Barrel Sample for this wine is above 14% ABV. '} score: 0.5232710381504038


In [17]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits][0]

# print(search_results)

{'name': 'Abreu Vineyards Madrona Ranch 2005', 'region': 'Napa Valley, California', 'variety': 'Red Wine', 'rating': 98.0, 'notes': 'Abreu Madrona Ranch is a blend of Cabernet Sauvignon and Cabernet Franc with accents of Merlot and Petit Verdot. The wine is aged in 100% new French oak for two years. The wine receives an addition two years of bottle age prior to release.'}


In [14]:
# Now time to connect to the local large language model
from openai import OpenAI
client = OpenAI(
    # base_url="http://127.0.0.1:8080/v1", # "http://<Your api-server IP>:port"
    api_key = "please add your key here"
)
completion = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their requests."},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": str(search_results)}
    ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content="I'm sorry, I couldn't find information specifically for the Abreu Vineyards Madrona Ranch 1993. However, I can provide you with details about the Abreu Vineyards Madrona Ranch 2005, which has a rating of 98.0. This wine is a blend of Cabernet Sauvignon and Cabernet Franc with accents of Merlot and Petit Verdot. It is aged in 100% new French oak for two years and receives an additional two years of bottle age prior to release. If you're interested in this wine, I can help you find more information or recommend similar options.", role='assistant', function_call=None, tool_calls=None)
