In [5]:
from openai import AzureOpenAI
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import CSVLoader
from langchain_community.vectorstores import AzureSearch
from langchain_openai import AzureOpenAIEmbeddings
import os
from dotenv import load_dotenv
from pprint import pprint

load_dotenv()

True

In [6]:
required_vars = [
    "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_API_KEY", "AZURE_OPENAI_API_VERSION",
    "EMBEDDING_DEPLOYMENT_NAME", "CHAT_DEPLOYMENT_NAME", "SEARCH_SERVICE_NAME",
    "SEARCH_API_KEY", "SEARCH_INDEX_NAME"
]
missing_vars = [var for var in required_vars if not os.getenv(var)]

if missing_vars:
    raise ValueError(
        f"Missing required environment variables: {', '.join(missing_vars)}. "
        "Please check your .env file and ensure it is in the correct directory and all keys are present."
    )
else:
    print("All required environment variables are loaded successfully.")

All required environment variables are loaded successfully.


In [7]:
embeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.getenv("EMBEDDING_DEPLOYMENT_NAME"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)
print("AzureOpenAIEmbeddings client configured.")

vector_store = AzureSearch(
    azure_search_endpoint=os.getenv("SEARCH_SERVICE_NAME"),
    azure_search_key=os.getenv("SEARCH_API_KEY"),
    index_name=os.getenv("SEARCH_INDEX_NAME"),
    embedding_function=embeddings.embed_query
)
print("AzureSearch vector store connected.")

AzureOpenAIEmbeddings client configured.
AzureSearch vector store connected.


In [9]:
loader = CSVLoader(file_path="wine-ratings.csv", encoding="utf-8")
documents = loader.load()

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [None]:
print(f"Adding {len(docs)} documents to the Azure Cognitive Search index...")
vector_store.add_documents(documents=docs)
print("Documents added successfully.")

In [12]:
query = "What is the best Cabernet Sauvignon wine with the most points"
retrieved_docs = vector_store.similarity_search_with_relevance_scores(
    query=query,
    k=5,
)

if retrieved_docs:
    top_doc, score = retrieved_docs[0]
    print(f"\nQuery: '{query}'\n")
    print(f"Top matching document (score: {score:.4f}):")
    print("-" * 30)
    print(top_doc.page_content)
else:
    print("No documents found for the query.")
    top_doc = None


Query: 'What is the best Cabernet Sauvignon wine with the most points'

Top matching document (score: 0.8732):
------------------------------
: 2561
name: Armanino The Pointer Cabernet Sauvignon 2010
grape: 
region: Napa Valley, California
variety: Red Wine
rating: 92.0
notes: A deep, dark black ink color with touches of Ferrari red. On the nose, spices like thyme and rosemary with blueberry, blackberry and cedar wood with hints of hibiscus, soap and plums. This is a very big, bold fruit driven wine with flavors of black currant and blackberry jam evolving on fresh fruit like cherry and strawberry.


In [9]:
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

context = "No relevant wine found."
if top_doc:
    context = top_doc.page_content

In [13]:
messages = [
    {"role": "system", "content": "You are a helpful wine assistant. Use the provided context to answer the user's question about wine."},
    {"role": "user", "content": query},
    {"role": "assistant", "content": f"Context from wine database: {context}"}
]

response = client.chat.completions.create(
    model=os.getenv("CHAT_DEPLOYMENT_NAME"),
    messages=messages,
)

final_response = response.choices[0].message.content

print("\n--- RAG Response ---")
pprint(response.model_dump())
print("\n--- Final Answer ---")
print(final_response)


--- RAG Response ---
{'choices': [{'content_filter_results': {'hate': {'filtered': False,
                                                  'severity': 'safe'},
                                         'protected_material_code': {'detected': False,
                                                                     'filtered': False},
                                         'protected_material_text': {'detected': False,
                                                                     'filtered': False},
                                         'self_harm': {'filtered': False,
                                                       'severity': 'safe'},
                                         'sexual': {'filtered': False,
                                                    'severity': 'safe'},
                                         'violence': {'filtered': False,
                                                      'severity': 'safe'}},
              'finish_reason': 'stop',
  