In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores import AzureCosmosDBVectorSearch
from langchain_text_splitters import RecursiveCharacterTextSplitter


import os
from dotenv import load_dotenv
load_dotenv()

OPEN_AI_ENDPOINT = os.getenv("OPEN_AI_ENDPOINT")
subscription_key = os.getenv("SUBSCRIPTION_KEY")


In [2]:
llm = AzureChatOpenAI(
    api_version="2025-01-01-preview",
    azure_endpoint=OPEN_AI_ENDPOINT,
    api_key=subscription_key,
    temperature=0.1,     
)


In [10]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a Professor. You are to answer the question in 20-30 words "),
    ("user", "tell me a about {topic}"),
])

chain = prompt | llm

response = chain.invoke({"topic": "Vector Databases"})
print(response.content)

Vector databases store and manage high-dimensional vector embeddings, enabling efficient similarity search and machine learning tasks, often used in recommendation systems, natural language processing, and computer vision.


In [11]:
#embedding model text-embeddding-ada-002

EMBEDDING_MODEL_ENDPOINT = os.getenv("EMBEDDING_MODEL_ENDPOINT")

embedding = AzureOpenAIEmbeddings(
    azure_endpoint=EMBEDDING_MODEL_ENDPOINT,
    api_key=subscription_key,
)

In [12]:
query = "What is Azure OpenAI?"
query_embedding = embedding.embed_query(query)

#print the dimension of the embedding
print(len(query_embedding))

1536


In [13]:
from azure.cosmos import CosmosClient

#cosmos vector store connection
COSMOS_ENDPOINT = os.getenv("COSMOS_ENDPOINT")
COSMOS_KEY = os.getenv("COSMOS_KEY")

client = CosmosClient(COSMOS_ENDPOINT, COSMOS_KEY)


In [None]:
texts = [
    "Azure AI Foundry is a unified platform for building generative AI applications.",
    "Azure Cosmos DB is a globally distributed, multi-model database service with native vector search.",
    "LangChain provides a framework to connect LLMs with vector databases like Cosmos DB."
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
documents = text_splitter.create_documents(texts)

print(documents)

[Document(metadata={}, page_content='Azure AI Foundry is a unified platform for building generative AI applications.'), Document(metadata={}, page_content='Azure Cosmos DB is a globally distributed, multi-model database service with native vector search.'), Document(metadata={}, page_content='LangChain provides a framework to connect LLMs with vector databases like Cosmos DB.')]


In [15]:
DATABASE_NAME = os.getenv("DATABASE_NAME")
CONTAINER_NAME = os.getenv("CONTAINER_NAME")

database = client.get_database_client(DATABASE_NAME)
container = database.get_container_client(CONTAINER_NAME)
print(f"Container '{CONTAINER_NAME}' already exists")

Container 'test_container' already exists
