# init azure openai

In [1]:
# set NO_PROXY=localhost,127.0.0.1,::1
import os
os.environ["NO_PROXY"] = "localhost,127.0.1,::1"

In [2]:
# Azure OpenAI setup
from azure.identity import DefaultAzureCredential
from langchain_openai import AzureChatOpenAI
import os

from dotenv import load_dotenv

load_dotenv()

default_credential = DefaultAzureCredential()
token = default_credential.get_token("https://cognitiveservices.azure.com/.default")
api_key = token.token

# set token to the env variable AZURE_OPENAI_API_KEY
os.environ["AZURE_OPENAI_API_KEY"] = token.token

In [3]:
llm = AzureChatOpenAI(
    azure_endpoint="https://begobaiatest.openai.azure.com/",
    azure_deployment="gpt-4o-mini",
    openai_api_version="2024-05-01-preview",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

## init embeddings

In [4]:
# Import and initialize HuggingFace embedder
from langchain_huggingface import HuggingFaceEmbeddings

EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL,
    model_kwargs={"device": "cpu"}
)

## Query ChromaDB (newHQ)

In [5]:
# 3) Load your existing ChromaDB using the UNC path
from langchain_chroma import Chroma

PERSIST_DIR     = r"\\dnsbego.de\dfsbego\home04\FuhrmannD\Documents\01_Trainee\Master\Thesis\code\agentic_system\data\newHQ_chroma_db"
COLLECTION_NAME = "berenberg_newhq_docs"

vectordb = Chroma(
    persist_directory=PERSIST_DIR,
    embedding_function=embeddings,
    collection_name=COLLECTION_NAME
)

In [6]:
# 4) Do a semantic search
query = "What parking options are available?"
TOP_K = 5

results = vectordb.similarity_search_with_score(query, k=TOP_K)

In [7]:
results

[(Document(id='77f3b202-ba4b-4788-825a-6f11fc2ee780', metadata={'page_label': '1', 'file_type': '.pdf', 'creator': 'PyPDF', 'total_pages': 1, 'subdirectory': 'Confluence New HQ', 'producer': 'PyPDF', 'source': 'W:\\T1368-WAM AI\\03 Projects\\02 AI Projects\\01 Berenberg AI Assistant (BAIA)\\15 newHQ data\\_new\\Confluence New HQ\\20252807_CONFLUENCE_Parken.pdf', 'creationdate': '', 'page': 0}, page_content='10) Parken & Mobilität\nParken & Mobilität\nParken & Mobilität\nParkplatzbuchung\nFahrrad\nParkplatzbuchung\nFrage Antwort\n1 Welche Parkmöglichkeiten gibt es am \nneuen Standort?\nAm Standort gibt es 168 Pakplätze, davon 72 mit E-Ladesäulen. Sowie 157 Indoor-\nFahrradstellplätze, 12 davon mit E-Ladesäule.\n2 Wieviele Kundenparkplätze stehen zur \nVerfügung?\nInsgesamt gibt es 10 Kundenparkplätze.\n3 Wie kann ich einen Parkplatz buchen?Die Buchung erfolgt über eine zentrale Park-App.\n4 Kann ich mehrere Kennzeichen in der \nApp hinterlegen?\nJa, in der App können mehrere Kennzeichen

In [8]:
# 5) Inspect the top hits
for rank, (doc, score) in enumerate(results, start=1):
    print(f"=== Hit #{rank} (score: {score:.4f}) ===\n")
    print(doc.page_content[:500].strip(), "...\n")
    print("   ↳ metadata:", doc.metadata, "\n")

=== Hit #1 (score: 1.2384) ===

10) Parken & Mobilität
Parken & Mobilität
Parken & Mobilität
Parkplatzbuchung
Fahrrad
Parkplatzbuchung
Frage Antwort
1 Welche Parkmöglichkeiten gibt es am 
neuen Standort?
Am Standort gibt es 168 Pakplätze, davon 72 mit E-Ladesäulen. Sowie 157 Indoor-
Fahrradstellplätze, 12 davon mit E-Ladesäule.
2 Wieviele Kundenparkplätze stehen zur 
Verfügung?
Insgesamt gibt es 10 Kundenparkplätze.
3 Wie kann ich einen Parkplatz buchen?Die Buchung erfolgt über eine zentrale Park-App.
4 Kann ich mehrere Kennze ...

   ↳ metadata: {'page_label': '1', 'file_type': '.pdf', 'creator': 'PyPDF', 'total_pages': 1, 'subdirectory': 'Confluence New HQ', 'producer': 'PyPDF', 'source': 'W:\\T1368-WAM AI\\03 Projects\\02 AI Projects\\01 Berenberg AI Assistant (BAIA)\\15 newHQ data\\_new\\Confluence New HQ\\20252807_CONFLUENCE_Parken.pdf', 'creationdate': '', 'page': 0} 

=== Hit #2 (score: 1.2384) ===

10) Parken & Mobilität
Parken & Mobilität
Parken & Mobilität
Parkplatzbuchung
Fa

In [9]:
# Create retriever
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

In [10]:
from langchain.chains import RetrievalQA

# Create QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=retriever,
    return_source_documents=True
)

In [11]:
query = "What parking options are available?"
result = qa_chain.invoke(query)
print(result)

{'query': 'What parking options are available?', 'result': 'At the new location, there are 168 parking spaces available, including 46 equipped with E-charging stations. You can specify whether you need a standard parking space, an E-parking space, or a disabled parking space when making a reservation. \n\nParking can be easily booked through a central parking app with just one click, and you will receive immediate confirmation. You can reserve a spot for a maximum of 5 days at a time. On the day of your booking, you can access the underground garage using your general access card. \n\nAdditionally, the app features an in-app navigation map to help you find your designated parking space easily. At the end of the month, the usage of the parking space will be automatically billed. For customers, there are also 10 designated parking spaces available. If you enter the underground garage without a reservation, you can book a spot spontaneously via the app, but unauthorized vehicles may be su

In [12]:
result

{'query': 'What parking options are available?',
 'result': 'At the new location, there are 168 parking spaces available, including 46 equipped with E-charging stations. You can specify whether you need a standard parking space, an E-parking space, or a disabled parking space when making a reservation. \n\nParking can be easily booked through a central parking app with just one click, and you will receive immediate confirmation. You can reserve a spot for a maximum of 5 days at a time. On the day of your booking, you can access the underground garage using your general access card. \n\nAdditionally, the app features an in-app navigation map to help you find your designated parking space easily. At the end of the month, the usage of the parking space will be automatically billed. For customers, there are also 10 designated parking spaces available. If you enter the underground garage without a reservation, you can book a spot spontaneously via the app, but unauthorized vehicles may be s

In [14]:
from langchain_core.prompts import PromptTemplate

custom_prompt_template = """You are a Confluence chatbot answering questions. Use the following pieces of context to answer the question at the end. If you don't know the answer, say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:"""
CUSTOMPROMPT = PromptTemplate(
    template=custom_prompt_template, input_variables=["context", "question"]
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": CUSTOMPROMPT}
)

query = "What parking options are available?"
result = qa_chain.invoke(query)
print(result)

{'query': 'What parking options are available?', 'result': 'At the new location, there are a total of 168 parking spaces, of which 72 are equipped with E-charging stations. Additionally, there are 157 indoor bicycle parking spaces, with 12 of those also having E-charging stations.', 'source_documents': [Document(id='77f3b202-ba4b-4788-825a-6f11fc2ee780', metadata={'creator': 'PyPDF', 'producer': 'PyPDF', 'page': 0, 'subdirectory': 'Confluence New HQ', 'creationdate': '', 'total_pages': 1, 'page_label': '1', 'file_type': '.pdf', 'source': 'W:\\T1368-WAM AI\\03 Projects\\02 AI Projects\\01 Berenberg AI Assistant (BAIA)\\15 newHQ data\\_new\\Confluence New HQ\\20252807_CONFLUENCE_Parken.pdf'}, page_content='10) Parken & Mobilität\nParken & Mobilität\nParken & Mobilität\nParkplatzbuchung\nFahrrad\nParkplatzbuchung\nFrage Antwort\n1 Welche Parkmöglichkeiten gibt es am \nneuen Standort?\nAm Standort gibt es 168 Pakplätze, davon 72 mit E-Ladesäulen. Sowie 157 Indoor-\nFahrradstellplätze, 12 d

In [None]:
result

{'query': 'What parking options are available?',
 'result': 'At the new location, there are a total of 168 parking spaces, of which 72 are equipped with E-charging stations. Additionally, there are 157 indoor bicycle parking spaces, with 12 of those also having E-charging stations.',
 'source_documents': [Document(id='77f3b202-ba4b-4788-825a-6f11fc2ee780', metadata={'creator': 'PyPDF', 'producer': 'PyPDF', 'page': 0, 'subdirectory': 'Confluence New HQ', 'creationdate': '', 'total_pages': 1, 'page_label': '1', 'file_type': '.pdf', 'source': 'W:\\T1368-WAM AI\\03 Projects\\02 AI Projects\\01 Berenberg AI Assistant (BAIA)\\15 newHQ data\\_new\\Confluence New HQ\\20252807_CONFLUENCE_Parken.pdf'}, page_content='10) Parken & Mobilität\nParken & Mobilität\nParken & Mobilität\nParkplatzbuchung\nFahrrad\nParkplatzbuchung\nFrage Antwort\n1 Welche Parkmöglichkeiten gibt es am \nneuen Standort?\nAm Standort gibt es 168 Pakplätze, davon 72 mit E-Ladesäulen. Sowie 157 Indoor-\nFahrradstellplätze, 12

In [26]:
result['result']

'At the new location, there are a total of 168 parking spaces, of which 72 are equipped with E-charging stations. Additionally, there are 157 indoor bicycle parking spaces, with 12 of those also having E-charging stations.'