In [8]:
import getpass
import os
from langchain_openai import AzureOpenAIEmbeddings
from langchain_openai import AzureChatOpenAI

if not os.environ.get("AZURE_OPENAI_API_KEY"):
  os.environ["AZURE_OPENAI_API_KEY"] = getpass.getpass("Enter API key for Azure: ")

if not os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME"):
  os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"] = getpass.getpass("Enter AZURE_OPENAI_DEPLOYMENT_NAME: ")

if not os.environ.get("AZURE_OPENAI_API_VERSION"):
  os.environ["AZURE_OPENAI_API_VERSION"] = getpass.getpass("Enter AZURE_OPENAI_API_VERSION: ")

if not os.environ.get("AZURE_OPENAI_ENDPOINT"):
  os.environ["AZURE_OPENAI_ENDPOINT"] = getpass.getpass("Enter AZURE_OPENAI_ENDPOINT: ")

# import os
# from langchain_azure_ai.chat_models import AzureAIChatCompletionsModel

# model = AzureAIChatCompletionsModel(
#     endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
#     credential=os.environ["AZURE_INFERENCE_CREDENTIAL"],
#     model="mistral-large-2407",
# )


embeddings = AzureOpenAIEmbeddings(
  model="text-embedding-3-small",
  api_key=os.environ["AZURE_OPENAI_API_KEY"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment="text-embedding-3-small",
    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
)

llm = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
)

In [9]:
# texts = [("Hello, world!"),("How are you?")]
# embeddings.embed_documents(texts)

In [10]:
texts = ("Hello, world! How are you?")
embeddings.embed_documents([texts])

[[0.004838842898607254,
  -0.03712594136595726,
  0.013522780500352383,
  0.05429896339774132,
  -0.027045561000704765,
  -0.021836496889591217,
  -0.011437855660915375,
  0.04294554516673088,
  -0.01829017698764801,
  -0.032215654850006104,
  -0.008164329454302788,
  -0.01219128631055355,
  -0.011587242595851421,
  -0.027669088914990425,
  0.005287004169076681,
  0.023330368101596832,
  -0.05476661026477814,
  0.06240483745932579,
  -0.007677197922021151,
  0.033332809805870056,
  0.06624992936849594,
  -0.019900960847735405,
  -0.007898030802607536,
  0.017419835552573204,
  0.05471464991569519,
  0.010944228619337082,
  -0.010067391209304333,
  0.033384770154953,
  -0.014471063390374184,
  -0.04026956483721733,
  0.05362347513437271,
  -0.034579865634441376,
  -0.018926696851849556,
  0.019004637375473976,
  -0.0013526027323678136,
  0.018108313903212547,
  -0.006930262316018343,
  -0.006433388218283653,
  -0.009755627252161503,
  -0.04702446237206459,
  -0.028084775432944298,
  -0.

In [11]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document

vector_store = InMemoryVectorStore(embeddings)

vector_store.add_documents(documents=[Document(page_content=texts)])

['e5fd5431-2778-4d66-9cba-8075e136e06c']

In [12]:
# from langchain_core.documents import Document

# document_1 = Document(id="1", page_content="foo", metadata={"baz": "bar"})
# document_2 = Document(id="2", page_content="thud", metadata={"bar": "baz"})

# documents = [document_1, document_2]
# vector_store.add_documents(documents=documents)

In [13]:
# from langchain_community.vectorstores import FAISS

# vector_store = FAISS(embedding_function=embeddings)

In [None]:
# Test retrieval with a query
query = "how many hello occurred in the context?"
retrieved_docs = vector_store.similarity_search(query, k=1)

# Retrieve most relevant document
retrieved_text = retrieved_docs[0].page_content

# Explicit summarization prompt
summary_prompt = (
    # "Task: how many hello occurred in the context?**.\n\n"
    # "Text:\n"
    # f"{retrieved_text}\n\n"
    # "Short Output:"
    f"""
    Use the given context to answer user's query.

    Query:
    {query}

    Context:
    {retrieved_text} 
    
    Give a concise answer in one sentence.

    """.strip()
)
short_summary = llm.invoke(summary_prompt)

print("Short Output:", short_summary)

Short Output: content='One "hello" occurred in the context.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 51, 'total_tokens': 60, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_5603ee5e2e', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'fi

In [None]:
# extract content with langchain library 
print("short_summary:", short_summary.content)

short_summary: One "hello" occurred in the context.
