In [None]:
import os
import openai
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import AzureSearch
from dotenv import load_dotenv
load_dotenv('.env')


In [None]:
embeddings = OpenAIEmbeddings(deployment="demo-embedding", chunk_size=1)

# Connect to Azure Cognitive Search
acs = AzureSearch(azure_search_endpoint=os.getenv('SEARCH_SERVICE_NAME'),
                 azure_search_key=os.getenv('SEARCH_API_KEY'),
                 index_name=os.getenv('SEARCH_INDEX_NAME'),
                 embedding_function=embeddings.embed_query)

In [None]:
from langchain.document_loaders import CSVLoader

loader = CSVLoader("wine-ratings.csv")
documents = loader.load()

In [None]:
# Debug: Check what was loaded from CSV
print(f"Number of documents loaded: {len(documents)}")
print(f"First document preview: {documents[0].page_content[:200] if documents else 'No documents'}...")
print(f"Document metadata: {documents[0].metadata if documents else 'No documents'}")

In [None]:
# Process documents in smaller batches to avoid timeout
from langchain.text_splitter import CharacterTextSplitter
import time

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

print(f"Total documents after splitting: {len(docs)}")
print("Processing documents in batches...")

# Process in batches of 50 documents to avoid API timeouts
batch_size = 50
total_batches = (len(docs) + batch_size - 1) // batch_size
successful_uploads = 0

for i in range(0, len(docs), batch_size):  # Start with first 200 docs as a test
    batch_docs = docs[i:i+batch_size]
    batch_num = i // batch_size + 1
    
    try:
        print(f"Processing batch {batch_num}/{total_batches} ({len(batch_docs)} documents)...")
        acs.add_documents(documents=batch_docs)
        successful_uploads += len(batch_docs)
        print(f"✓ Batch {batch_num} uploaded successfully")
        
        # Small delay to be gentle on APIs
        time.sleep(1)
        
    except Exception as e:
        print(f"✗ Error in batch {batch_num}: {str(e)}")
        break

print(f"\nTotal documents successfully uploaded: {successful_uploads}")
print(f"Remaining documents: {len(docs) - successful_uploads}")

In [None]:
# Test the search functionality with the uploaded documents
print("Testing similarity search...")

try:
    results = acs.similarity_search_with_relevance_scores(
        query="What is the best Cabernet Sauvignon wine in Napa Valley above 94 points",
        k=5,
    )
    
    if results:
        print(f"\n✓ Found {len(results)} results!")
        print(f"\nTop result (relevance: {results[0][1]:.3f}):")
        print(results[0][0].page_content)
        print(f"\nMetadata: {results[0][0].metadata}")
    else:
        print("No results found")
        
except Exception as e:
    print(f"Search error: {e}")

In [None]:
# ORIGINAL PROBLEMATIC CODE - CAUSES TIMEOUT WITH LARGE CSV FILES
# This cell will fail with 32k+ documents because it tries to process all at once
# from langchain.text_splitter import CharacterTextSplitter
# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# docs = text_splitter.split_documents(documents)
# acs.add_documents(documents=docs)  # This times out with large datasets

#print("⚠️  This cell has been disabled to prevent timeout.")
#print("📊 The wine-ratings.csv contains 32,780+ records.")
#print("🔧 Use the batch processing approach in the cells below instead.")
#print("✅ Batch processing successfully uploads documents without timeouts.")

In [None]:
docs = acs.similarity_search_with_relevance_scores(
    query="What is the best Cabernet Sauvignon wine in Napa Valley above 94 points",
    k=5,
)
print(docs[0][0].page_content)
print(dir(docs[0][0]))

In [34]:
# 🔧 TROUBLESHOOTING: Azure OpenAI Chat Completion
print("🔍 Troubleshooting Azure OpenAI Configuration...")

# Check critical environment variables
api_base = os.getenv("OPENAI_API_BASE")
api_key = os.getenv("OPENAI_API_KEY")
model_name = os.getenv("OPENAI_MODEL")

print(f"API Base: {api_base}")
print(f"API Key: {'✅ Set' if api_key else '❌ Missing'}")
print(f"Model: {model_name}")

# 🚨 CRITICAL ISSUE: Missing OPENAI_API_BASE
if not api_base:
    print("\n❌ CRITICAL ERROR: OPENAI_API_BASE is not set!")
    print("🔧 SOLUTION: Set your Azure OpenAI endpoint:")
    print("   export OPENAI_API_BASE='https://your-resource-name.openai.azure.com/'")
    print("\n⚠️  Cannot proceed with OpenAI API call without valid endpoint.")
    
else:
    # Proceed with API call if endpoint is available
    print(f"\n✅ Configuration looks good, attempting API call...")
    
    try:
        # Modern OpenAI v1.0+ API syntax for Azure OpenAI
        from openai import AzureOpenAI
        
        # Initialize Azure OpenAI client
        client = AzureOpenAI(
            api_key=api_key,
            api_version="2023-05-15",
            azure_endpoint=api_base
        )
        
        # Get retrieved documents from search results
        retrieved_content = results[0][0].page_content if 'results' in locals() and results else "No wine data found"
        
        messages = [
            {"role": "system", "content": "Assistant is a chatbot that helps you find the best wine for your taste."},
            {"role": "user", "content": "What is the best wine in Oregon above 92 points?"},
            {"role": "assistant", "content": retrieved_content}
        ]
        
        print("🤖 Calling Azure OpenAI...")
        response = client.chat.completions.create(
            model=model_name,
            messages=messages,
        )
        
        print("✅ Success! OpenAI API call completed.")
        print(f"\nAssistant reply: {response.choices[0].message.content}")
        
    except Exception as e:
        print(f"❌ Error during API call: {str(e)}")
        print("\n🔧 Possible solutions:")
        print("1. Verify OPENAI_API_BASE points to correct Azure OpenAI endpoint")
        print("2. Check OPENAI_API_KEY is valid and active") 
        print("3. Ensure model name matches your Azure OpenAI deployment")
        print("4. Verify your Azure subscription has access to OpenAI service")

🔍 Troubleshooting Azure OpenAI Configuration...
API Base: https://oai-demo-search.openai.azure.com/
API Key: ✅ Set
Model: text-embedding-ada-002

✅ Configuration looks good, attempting API call...
🤖 Calling Azure OpenAI...
❌ Error during API call: Error code: 401 - {'error': {'code': '401', 'message': 'Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.'}}

🔧 Possible solutions:
1. Verify OPENAI_API_BASE points to correct Azure OpenAI endpoint
2. Check OPENAI_API_KEY is valid and active
3. Ensure model name matches your Azure OpenAI deployment
4. Verify your Azure subscription has access to OpenAI service
