In [21]:
from llama_index import LLMPredictor, ServiceContext
# from llama_index import VectorStoreIndex
# from llama_index import SimpleDirectoryReader
from llama_index import Prompt
from llama_index import PromptHelper
from llama_index import StorageContext, load_index_from_storage
from llama_index.evaluation import ResponseEvaluator
from llama_index.llms import OpenAI
# from langchain.chat_models import ChatOpenAI
from IPython.display import Markdown, display

import environ
import openai

In [2]:
# For now I use my key
env = environ.Env()
environ.Env.read_env()
API_KEY = env("OPENAI_API_KEY")
openai.api_key = API_KEY



# Load vector store

In [3]:
embedding_path = "vector_db"
# rebuild storage context
storage_context_from_load = StorageContext.from_defaults(persist_dir=embedding_path)
# load index
index_loaded = load_index_from_storage(storage_context_from_load)

# Evaluator

In [None]:
def eval_response(response):
    service_context_eval = ServiceContext.from_defaults(llm_predictor=llm)
    evaluator = ResponseEvaluator(service_context=service_context_eval)
    eval_result = evaluator.evaluate(response)
    if eval_result == "NO":
        print(str(eval_result))
        print("info not available")
        return None
    elif eval_result == "YES":
        display(Markdown(response.response))
        response_metadata_chat = dict()
        for i, source_node in enumerate(response.source_nodes):
            key_name = "ref_" + str(i)
            response_metadata_chat[key_name] = {
                "page": source_node.node.metadata["page_label"],
                "document":source_node.node.metadata["file_name"]
            }
        return response_metadata_chat
    else:
        print("something went wrong, try again!")
        return None

# as_query_engine

In [17]:
# Define prompt
template = (
    "We have provided context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Do not give me an answer if it is not mentioned in the context as a fact. \n"
    "Given this information, please provide me with an answer to the following question:\n{query_str}\n"
)
qa_template = Prompt(template)

query_engine_1 = index_loaded.as_query_engine(text_qa_template=qa_template, similarity_top_k=3)

In [18]:
response_1 = query_engine_1.query("What do you know about the city of Berlin?")

In [19]:
display(Markdown(response_1.response))

Based on the provided context information, there is no specific information about the city of Berlin.

In [8]:
response_1.metadata

{'101c5b70-98d4-432f-bfd5-80811b2128f7': {'page_label': '31',
  'file_name': 'dok_ba013155.pdf'},
 '9db2aa01-0b59-481e-84bc-9cde6f424936': {'page_label': '10',
  'file_name': 'merkblatt-11-berufsberatung_ba015370.pdf'},
 'bc2e065b-9c57-4449-8dea-e5ce8abce924': {'page_label': '16',
  'file_name': 'merkblatt-algii_ba015397.pdf'}}

In [9]:
response_text = response_1.response
response_metadata = dict()
response_metadata_message = f'There {len(response_1.metadata)} sources:'
for i, meta_data in enumerate(response_1.metadata):
    key_name = "ref_" + str(i)
    response_metadata[key_name] = {
        "page": response_1.metadata[meta_data]["page_label"],
        "document":response_1.metadata[meta_data]["file_name"]
    }
    response_metadata_message += "\n -" + key_name + f'Page {response_1.metadata[meta_data]["page_label"]} from file {response_1.metadata[meta_data]["file_name"]}'

response_metadata

{'ref_0': {'page': '31', 'document': 'dok_ba013155.pdf'},
 'ref_1': {'page': '10',
  'document': 'merkblatt-11-berufsberatung_ba015370.pdf'},
 'ref_2': {'page': '16', 'document': 'merkblatt-algii_ba015397.pdf'}}

In [99]:
eval_response(response_1)

NO
info not available


# as_chat_engine

In [85]:
# Configure prompt parameters and initialise helper
max_input_size = 4096
num_output = 256
max_chunk_overlap = 0.2
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

system_prompt = (
    """
    You are an expert on the German administration system and your job is to answer technical questions.
    Assume that all questions are related to the the provided context.
    Keep your answers based on facts, do not hallucinate information.
    """
)

llm= LLMPredictor(llm=OpenAI(
    temperature=0,
    model_name="gpt-3.5-turbo",
    system_prompt=system_prompt
))

service_context = ServiceContext.from_defaults(llm_predictor=llm, prompt_helper=prompt_helper)

chat_engine = index_loaded.as_chat_engine(
    #service_context=service_context,
    chat_mode="context",
    # verbose=True
    similarity_top_k=3
)

In [101]:
response_chat_test = chat_engine.chat("What do you know about the city of Berlin?")
# response_chat_test = chat_engine.chat("Is the unemployment benefit based on the number of children I have?")

In [102]:
response_chat_test

AgentChatResponse(response="Berlin is the capital and largest city of Germany. It is located in the northeastern part of the country and is known for its rich history, vibrant culture, and diverse population. Here are some key points about Berlin:\n\n1. History: Berlin has a significant historical background, including its division during the Cold War when it was split into East Berlin (under Soviet control) and West Berlin (controlled by the Allies). The Berlin Wall, which separated the two parts, became a symbol of the Cold War. The city was reunified in 1990 after the fall of the wall.\n\n2. Landmarks: Berlin is home to several iconic landmarks, such as the Brandenburg Gate, a symbol of German unity, and the Reichstag building, which houses the German parliament. Other notable sites include the Berlin Wall Memorial, Checkpoint Charlie, and the Holocaust Memorial.\n\n3. Cultural Hub: Berlin is renowned for its thriving arts and cultural scene. It has numerous museums, galleries, thea

In [103]:
metadata_chat = eval_response(response_chat_test)

metadata_chat

NO
info not available


In [88]:
print(f"There {len(response_chat_test.source_nodes)} sources.")
for source_node in response_chat_test.source_nodes:
    print(f'Page {source_node.node.metadata["page_label"]} from file {source_node.node.metadata["file_name"]}')

There 3 sources.
Page 31 from file dok_ba013155.pdf
Page 10 from file merkblatt-11-berufsberatung_ba015370.pdf
Page 16 from file merkblatt-algii_ba015397.pdf


In [94]:
response_chat_test.response

"Berlin is the capital and largest city of Germany. It is located in the northeastern part of the country and is known for its rich history, vibrant culture, and diverse population. Here are some key points about Berlin:\n\n1. History: Berlin has a significant historical background, including its division during the Cold War when it was split into East Berlin (under Soviet control) and West Berlin (controlled by the Allies). The Berlin Wall, which separated the two parts, became a symbol of the Cold War. The city was reunified in 1990 after the fall of the wall.\n\n2. Landmarks: Berlin is home to several iconic landmarks, such as the Brandenburg Gate, a symbol of German unity, and the Reichstag building, which houses the German parliament. Other notable sites include the Berlin Wall Memorial, Checkpoint Charlie, and the Holocaust Memorial.\n\n3. Cultural Hub: Berlin is renowned for its thriving arts and cultural scene. It has numerous museums, galleries, theaters, and music venues. The

In [100]:
len(response_chat_test.source_nodes)

3

In [91]:
response_chat_test.source_nodes[0]

NodeWithScore(node=TextNode(id_='101c5b70-98d4-432f-bfd5-80811b2128f7', embedding=None, metadata={'page_label': '31', 'file_name': 'dok_ba013155.pdf'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='360b1b3b-dfeb-455b-8a57-9e29ae2036f5', node_type=None, metadata={'page_label': '31', 'file_name': 'dok_ba013155.pdf'}, hash='e480367de126c1d8d7861417949102d1ec5d2e2d2f802eec483440253fd1a247')}, hash='e480367de126c1d8d7861417949102d1ec5d2e2d2f802eec483440253fd1a247', text='315 In Deutschland  gearbeitet und im Ausland gewohnt\n5  Sie haben in Deutschland \n gearbeitet und haben als \nGrenzgängerin bzw. Grenz -\ngänger im (benach  barten) \nAusland gewohnt?\n5.1  Zusätzliche Arbeitsuchendmeldung von \nGrenzgängerinnen bzw. Grenzgängern im \nbisherigen Beschäftigungsstaat\nWenn Sie in einem anderen Mitgliedstaat wohnen und \neine Beschäftigung von dort aus als Grenzgängerin \nbzw. Grenzgänger in Deutschlan

In [93]:
response_chat_test.source_nodes[0].node.metadata["page_label"]

'31'