In [24]:
import os
import openai
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import AzureSearch
from dotenv import load_dotenv
load_dotenv('.env')


True

In [11]:
import pandas as pd
df = pd.read_csv('wine-ratings.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

32422

In [12]:
df

Unnamed: 0.1,Unnamed: 0,name,grape,region,variety,rating,notes
0,0,1000 Stories Bourbon Barrel Aged Batch Blue Ca...,,"Mendocino, California",Red Wine,91.0,"This is a very special, limited release of 100..."
1,1,1000 Stories Bourbon Barrel Aged Gold Rush Red...,,California,Red Wine,89.0,The California Gold Rush was a period of coura...
2,2,1000 Stories Bourbon Barrel Aged Gold Rush Red...,,California,Red Wine,90.0,The California Gold Rush was a period of coura...
3,3,1000 Stories Bourbon Barrel Aged Zinfandel 2013,,"North Coast, California",Red Wine,91.0,"The wine has a deep, rich purple color. An int..."
4,4,1000 Stories Bourbon Barrel Aged Zinfandel 2014,,California,Red Wine,90.0,Batch #004 is the first release of the 2014 vi...
...,...,...,...,...,...,...,...
32775,32775,Lewis Cellars Syrah Ethan's Blend 2005,,California,Red Wine,92.0,"Now 7 years old, Alec's younger brother Ethan ..."
32776,32776,Lewis Cellars Syrah Ethan's Blend 2014,,California,Red Wine,93.0,"Born in 2000, everything about Ethan and his n..."
32777,32777,Lewis Cellars Syrah Ethan's Blend 2009,,California,Red Wine,94.0,"Positioned between brothers, Alec and Mason, E..."
32778,32778,Lexington Apex Red 2011,,"Santa Cruz Mountains, California",Red Wine,91.0,"Blend: 99% Cabernet Sauvignon, 1% Merlot"


In [2]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

  _torch_pytree._register_pytree_node(


In [4]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [5]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [6]:
# Create collection to store wines
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

  qdrant.recreate_collection(


True

In [13]:
# vectorize!
qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [14]:
user_prompt = "Suggest me an amazing Malbec wine from Argentina"

In [15]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'Unnamed: 0': 621, 'name': 'Alamos Malbec 2012', 'grape': nan, 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 89.0, 'notes': 'A classically Argentine wine, the Alamos 2012 Malbec is blended with small portions of Syrah and Bonarda to meld deep dark cherry and blackberry flavors with hints of brown spice and vanilla. A long finish and firm tannins make this Malbec unforgettable.'} score: 0.7764789912600035
{'Unnamed: 0': 5003, 'name': 'Bodega Benegas Estate Malbec 2007', 'grape': nan, 'region': 'Mendoza, Argentina', 'variety': 'Red Wine', 'rating': 90.0, 'notes': 'Benegas presents a Malbec that captures the uniqueness of this grape variety typical from Argentina. This wine is the best reflection of the exceptional climate and soil conditions of the province of Mendoza in Argentina and of a careful elaboration process carried out in the winery. '} score: 0.7728740863163077
{'Unnamed: 0': 620, 'name': 'Alamos Malbec 2010', 'grape': nan, 'region': 'Argentina', 'variety': 'Red Win

In [16]:

# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [22]:
os.getenv('OPENAI_API_BASE')

In [21]:
cd ..

/workspaces/azure-rag


In [25]:
import openai

openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_key = os.getenv('OPENAI_API_KEY')

completion = openai.ChatCompletion.create(
    model="LLaMA_CPP",
    messages=[
        {"role": "system", "content": "You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their requests."},
        {"role": "user", "content": "Suggest me an amazing Malbec wine from Argentina"},
        {"role": "assistant", "content": str(search_results)}
    ]
)
print(completion["choices"][0]["message"]["content"])

Okay, fantastic! Let’s dive into some amazing Malbecs from Argentina. Based on your request for “amazing” and your preferences for a classic style, here are a few recommendations, with a little bit of detail to help you choose:

**Top Tier - Truly Exceptional:**

*   **Alamos Malbec 2012:** (As mentioned before, this is consistently praised for its balance and complexity. It’s a fantastic entry point to the world of Argentine Malbecs.) - *Why it’s amazing:* Deep, ripe fruit (black cherry, blackberry), subtle spice notes (vanilla, cedar), and a long, velvety finish. It’s a really approachable and elegant wine.

*   **Bodega Benegas Estate Malbec 2007:** (This is a truly special wine – a classic example of the Mendoza style.) - *Why it’s amazing:*  This is a benchmark. It showcases the terroir of Mendoza – intense dark fruit (plum, blackcurrant), a beautiful aroma of cedar and tobacco, and a wonderfully integrated acidity. It’s a wine that demands attention.

**Excellent Choices - Worth 

In [29]:
cd webapp/

/workspaces/azure-rag/webapp


In [28]:
import pandas as pd
df = pd.read_csv('examples/1-setup-application/wine-ratings.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

32422

In [31]:
import pandas as pd
df = pd.read_csv('../examples/1-setup-application/wine-ratings.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

32422

In [4]:
embeddings = OpenAIEmbeddings(deployment="demo-embedding", chunk_size=1)

# Connect to Azure Cognitive Search
acs = AzureSearch(azure_search_endpoint=os.getenv('SEARCH_SERVICE_NAME'),
                 azure_search_key=os.getenv('SEARCH_API_KEY'),
                 index_name=os.getenv('SEARCH_INDEX_NAME'),
                 embedding_function=embeddings.embed_query)

In [9]:
from langchain.document_loaders import CSVLoader

loader = CSVLoader("wine-ratings.csv")
documents = loader.load()

In [None]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

acs.add_documents(documents=docs)

In [5]:
docs = acs.similarity_search_with_relevance_scores(
    query="What is the best Cabernet Sauvignon wine in Napa Valley above 94 points",
    k=5,
)
print(docs[0][0].page_content)
print(dir(docs[0][0]))

: 20
name: 1849 Declaration Napa Valley Cabernet Sauvignon 2014
grape: 
region: Napa Valley, California
variety: Red Wine
rating: 91.0
notes: The palate is robust with flavors of dark blueberry, blackberry, traces of red currant, and subtle sweet oak from the barrel. This wine is fruit forward, full-bodied and spreads richly across the palate with soft velvety tannins and a long-lasting finish.
['Config', '__abstractmethods__', '__annotations__', '__class__', '__class_vars__', '__config__', '__custom_root_type__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__exclude_fields__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_validators__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__include_fields__', '__init__', '__init_subclass__', '__iter__', '__json_encoder__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__post_root_validators__', '__pre_root_validators__', '__pretty__', '__private_attributes__', '__reduce__', '__reduce_ex

In [8]:
openai.api_base = os.getenv("OPENAI_API_BASE")  # Your Azure OpenAI resource's endpoint value.
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_type = "azure"
openai.api_version = "2023-05-15" 
messages=[
    {"role": "system", "content": "Asisstant is a chatbot that helps you find the best wine for your taste."},
    {"role": "user", "content": "What is the best wine in Oregon above 92 points?"},
    {"role": "assistant", "content": docs[0][0].page_content}
]

response = openai.ChatCompletion.create(
    engine="demo-alfredo",
    messages=messages,
)
from pprint import pprint
pprint(response)
print(response['choices'][0]['message']['content'])

{'choices': [{'finish_reason': 'stop',
              'index': 0,
              'message': {'content': 'I apologize for the confusion, but I '
                                     "don't have access to real-time wine "
                                     'ratings and reviews. It would be best to '
                                     'refer to professional wine rating '
                                     'websites or consult with a sommelier for '
                                     'specific recommendations on Oregon Pinot '
                                     'Noir wines above 94 points. They will '
                                     'have the most up-to-date and accurate '
                                     'information for you.',
                          'role': 'assistant'}}],
 'created': 1703696035,
 'id': 'chatcmpl-8aRSFAMFjRLXsPOGHajprdnUPTeuu',
 'model': 'gpt-35-turbo',
 'object': 'chat.completion',
 'usage': {'completion_tokens': 66,
           'prompt_tokens': 154,
