In [1]:
from llama_index import LLMPredictor, ServiceContext
from llama_index import VectorStoreIndex
from llama_index import SimpleDirectoryReader
from llama_index import Prompt
from llama_index import StorageContext, load_index_from_storage
# from llama_index.llms import OpenAI

from langchain.chat_models import ChatOpenAI

import environ
import openai

In [2]:
# For now I use my key
env = environ.Env()
environ.Env.read_env()
API_KEY = env("OPENAI_API_KEY")
openai.api_key = API_KEY



In [3]:
folder_vector_db = "vector_bd_CANC"

# Create and save vector database

In [4]:
doc_path = "documents_pdf/merkblatt-fuer-arbeitslose_ba036520.pdf"

In [5]:
documents = SimpleDirectoryReader(input_files=[doc_path]).load_data()
llm = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"))
service_context = ServiceContext.from_defaults(llm_predictor=llm)
index = VectorStoreIndex.from_documents(
    documents,
    service_context=service_context
)

# Define prompt
template = (
    "We have provided context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, please answer the question and each answer should start with code word Response: {query_str}\n"
)
qa_template = Prompt(template)

# Use the custom prompt when querying
query_engine_with_prompt = index.as_query_engine(text_qa_template=qa_template, similarity_top_k=3,)

In [6]:
response = query_engine_with_prompt.query("What do you know about the city of Berlin?")

In [7]:
response.response

'Response: Based on the provided context information, there is no direct mention or information about the city of Berlin.'

In [8]:
response.metadata

{'2bd21c5c-5fb8-4505-ae7b-511fd989ba4c': {'page_label': '5',
  'file_name': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 '04542c52-be10-4102-b72e-d76283a54e63': {'page_label': '2',
  'file_name': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 '928942b0-3482-484b-acfe-dd1675b3ef5b': {'page_label': '3',
  'file_name': 'merkblatt-fuer-arbeitslose_ba036520.pdf'}}

In [9]:
# Save index
index.storage_context.persist(persist_dir=folder_vector_db)

# Load vector database

In [10]:
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=folder_vector_db)
# load index
index_loaded = load_index_from_storage(storage_context)
# create query engine
query_engine_from_loaded = index_loaded.as_query_engine(text_qa_template=qa_template, similarity_top_k=3,)

In [11]:
response_from_loaded = query_engine_from_loaded.query("What do you know about the city of Berlin?")

In [12]:
response_from_loaded.response

'Response: Based on the provided context information, there is no direct mention or information about the city of Berlin. The context information primarily focuses on the Merkblatt (information brochure) for unemployed individuals and the services provided by the Agentur für Arbeit (Federal Employment Agency).'

In [13]:
response_from_loaded.metadata

{'2bd21c5c-5fb8-4505-ae7b-511fd989ba4c': {'page_label': '5',
  'file_name': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 '04542c52-be10-4102-b72e-d76283a54e63': {'page_label': '2',
  'file_name': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 '928942b0-3482-484b-acfe-dd1675b3ef5b': {'page_label': '3',
  'file_name': 'merkblatt-fuer-arbeitslose_ba036520.pdf'}}

# as_query_engine

In [14]:
query_engine_1 = index_loaded.as_query_engine(text_qa_template=qa_template, similarity_top_k=3)

In [15]:
response_1 = query_engine_1.query("What do you know about the city of Berlin?")

In [16]:
response_1.response

'Response: Based on the provided context information, there is no specific information about the city of Berlin. The context information is related to a brochure or leaflet about unemployment benefits and services provided by the German Federal Employment Agency (Agentur für Arbeit).'

In [17]:
response_1.metadata

{'2bd21c5c-5fb8-4505-ae7b-511fd989ba4c': {'page_label': '5',
  'file_name': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 '04542c52-be10-4102-b72e-d76283a54e63': {'page_label': '2',
  'file_name': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 '928942b0-3482-484b-acfe-dd1675b3ef5b': {'page_label': '3',
  'file_name': 'merkblatt-fuer-arbeitslose_ba036520.pdf'}}

In [18]:
response_text = response_1.response
response_metadata = dict()
response_metadata_message = f'There {len(response_1.metadata)} sources:'
for i, meta_data in enumerate(response_1.metadata):
    key_name = "ref_" + str(i)
    response_metadata[key_name] = {
        "page": response_1.metadata[meta_data]["page_label"],
        "document":response_1.metadata[meta_data]["file_name"]
    }
    response_metadata_message += "\n -" + key_name + f'Page {response_1.metadata[meta_data]["page_label"]} from file {response_1.metadata[meta_data]["file_name"]}'

response_metadata

{'ref_0': {'page': '5', 'document': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 'ref_1': {'page': '2', 'document': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 'ref_2': {'page': '3', 'document': 'merkblatt-fuer-arbeitslose_ba036520.pdf'}}

# as_chat_engine

In [19]:
query_engine_chat = index_loaded.as_chat_engine(
    text_qa_template=qa_template,
    similarity_top_k=3,
    chat_mode="context")

In [20]:
response_chat = query_engine_chat.chat("What do you know about the city of Berlin?")

In [21]:
response_chat

AgentChatResponse(response="Berlin is the capital and largest city of Germany. It is located in the northeastern part of the country and is known for its rich history, vibrant culture, and diverse population. Here are some key points about Berlin:\n\n1. History: Berlin has a significant historical background, including its role as the capital of Prussia, the German Empire, the Weimar Republic, and Nazi Germany. It was also divided into East and West Berlin during the Cold War, with the Berlin Wall separating the two parts.\n\n2. Landmarks: The city is home to several iconic landmarks, such as the Brandenburg Gate, Berlin Wall Memorial, Reichstag building, Checkpoint Charlie, and the Berlin TV Tower. These landmarks symbolize the city's history and reunification.\n\n3. Cultural Hub: Berlin is renowned for its thriving arts and cultural scene. It has numerous museums, art galleries, theaters, and music venues. The city hosts various festivals, including the Berlin International Film Fest

In [22]:
response_chat.response

"Berlin is the capital and largest city of Germany. It is located in the northeastern part of the country and is known for its rich history, vibrant culture, and diverse population. Here are some key points about Berlin:\n\n1. History: Berlin has a significant historical background, including its role as the capital of Prussia, the German Empire, the Weimar Republic, and Nazi Germany. It was also divided into East and West Berlin during the Cold War, with the Berlin Wall separating the two parts.\n\n2. Landmarks: The city is home to several iconic landmarks, such as the Brandenburg Gate, Berlin Wall Memorial, Reichstag building, Checkpoint Charlie, and the Berlin TV Tower. These landmarks symbolize the city's history and reunification.\n\n3. Cultural Hub: Berlin is renowned for its thriving arts and cultural scene. It has numerous museums, art galleries, theaters, and music venues. The city hosts various festivals, including the Berlin International Film Festival (Berlinale) and the Ca

In [23]:
print(f"There {len(response_chat.source_nodes)} sources.")
for source_node in response_chat.source_nodes:
    print(f'Page {source_node.node.metadata["page_label"]} from file {source_node.node.metadata["file_name"]}')

There 3 sources.
Page 5 from file merkblatt-fuer-arbeitslose_ba036520.pdf
Page 2 from file merkblatt-fuer-arbeitslose_ba036520.pdf
Page 3 from file merkblatt-fuer-arbeitslose_ba036520.pdf


In [24]:
response_metadata_chat = dict()

for i, source_node in enumerate(response_chat.source_nodes):
    key_name = "ref_" + str(i)
    response_metadata_chat[key_name] = {
        "page": source_node.node.metadata["page_label"],
        "document":source_node.node.metadata["file_name"]
    }

response_metadata_chat

{'ref_0': {'page': '5', 'document': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 'ref_1': {'page': '2', 'document': 'merkblatt-fuer-arbeitslose_ba036520.pdf'},
 'ref_2': {'page': '3', 'document': 'merkblatt-fuer-arbeitslose_ba036520.pdf'}}