In [1]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from azure.cosmos import CosmosClient

import os
from dotenv import load_dotenv
load_dotenv()

OPEN_AI_ENDPOINT = os.getenv("OPEN_AI_ENDPOINT")
subscription_key = os.getenv("SUBSCRIPTION_KEY")


In [2]:
llm = AzureChatOpenAI(
    api_version="2025-01-01-preview",
    azure_endpoint=OPEN_AI_ENDPOINT,
    api_key=subscription_key,
    temperature=0.1,     
)

In [3]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a Professor. You are to answer the question in 20-30 words "),
    ("user", "tell me a about {topic}"),
])

chain = prompt | llm

response = chain.invoke({"topic": "Vector Databases"})
print(response.content)

Vector databases store and manage high-dimensional vector embeddings, enabling efficient similarity search and retrieval for applications like recommendation systems, natural language processing, and computer vision.


In [4]:
#embedding model text-embeddding-ada-002
EMBEDDING_MODEL_ENDPOINT = os.getenv("EMBEDDING_MODEL_ENDPOINT")

embedding = AzureOpenAIEmbeddings(
    azure_endpoint=EMBEDDING_MODEL_ENDPOINT,
    api_key=subscription_key,
)

In [5]:
query = "What is Azure OpenAI?"
query_embedding = embedding.embed_query(query)

#print the dimension of the embedding
print(len(query_embedding))

1536


In [6]:
DATABASE_NAME = os.getenv("DATABASE_NAME")
CONTAINER_NAME = os.getenv("CONTAINER_NAME")

In [7]:
COSMOS_CONNECTION_STRING = os.getenv("COSMOS_CONNECTION_STRING")

# Cosmos_client connnection using Connection string 
client = CosmosClient.from_connection_string(COSMOS_CONNECTION_STRING)
database = client.get_database_client(DATABASE_NAME)
container = database.get_container_client(CONTAINER_NAME)

In [8]:
def insert_document(test_id, content):
    content_vector = embedding.embed_query(content)
    document = {
        "id": str(test_id),
        "content": content,
        "content_vector": content_vector
    }
    container.create_item(body=document)
    print(f"Inserted document with test_id: {test_id}")


In [9]:
#check if test_id already exits if it des do not insert just say "Document with test_id {test_id} already exists."
def check_document_exists(test_id):
    query = f"SELECT * FROM c WHERE c.id = '{test_id}'"
    items = list(container.query_items(query=query, enable_cross_partition_query=True))
    return len(items) > 0


In [17]:
def vector_search(query_text, top_k=5):
    # Generate user query embedding (1536 dimensions)
    query_embedding = embedding.embed_query(query_text)
    
    # SQL query with vector distance calculation
    query = """
    SELECT TOP @top_k 
        c.id,
        c.content,
        VectorDistance(c.content_vector, @embedding) AS similarity_score
    FROM c
    ORDER BY VectorDistance(c.content_vector, @embedding)
    """
    
    parameters = [
        {"name": "@embedding", "value": query_embedding},
        {"name": "@top_k", "value": top_k}
    ]
    
    try:
        # Run vector search query
        results = list(container.query_items(
            query=query,
            parameters=parameters,
            enable_cross_partition_query=True
        ))
        
        print(f"Vector search completed - Found {len(results)} results")
        return results
        
    except Exception as e:
        print(f"Error during vector search: {e}")
        return []


In [19]:
search_query = "What is Azure OpenAI Service?"
results = vector_search(search_query, top_k=1)

print(f"\nQuery: '{search_query}'\n")
for idx, result in enumerate(results, 1):
    print(f"Result {idx}:")
    print(f"  Content: {result['content'][:100]}...")
    print(f"  similarity_score: {result['similarity_score']:.4f}")

Vector search completed - Found 1 results

Query: 'What is Azure OpenAI Service?'

Result 1:
  Content: Azure OpenAI Service provides REST API access to OpenAI's powerful language models, enabling develop...
  similarity_score: 0.9117
