In [8]:
from glm4_rag import ChatGLM, ChatGLMEmbeddings
from llama_index.core import SimpleDirectoryReader
import os
from llama_index.core import VectorStoreIndex, StorageContext, Settings, load_index_from_storage
from llama_index.core.schema import NodeWithScore

import gradio as gr
import time

In [9]:
ZHIPU_API_KEY = 'b3589487b559e0400ced55525f26f3c2.WdVen2vc1c9f9dNC'  # API_KEY
FILE_DIR = '../../data/rag'  # 知识库目录
STORAGE_DIR = '../../storage'  # RAG Index缓存
PDF_URL = 'http://10.177.47.31:3330/business.pdf'  # 展示PDF地址

In [10]:
print('Build Up LLM')
Settings.llm = ChatGLM(model='glm-4', reuse_client=True, api_key=ZHIPU_API_KEY, )
# define embed model
print('Build Up Embed Model')
Settings.embed_model = ChatGLMEmbeddings(model='embedding-2', reuse_client=True, api_key=ZHIPU_API_KEY)
print()

Build Up LLM
Build Up Embed Model


In [11]:
if not os.path.exists(STORAGE_DIR):
  start = time.time()
  print('Build Up Directory', time.time() - start)
  documents = SimpleDirectoryReader(FILE_DIR).load_data()
  print('Build Up Vectory Index', time.time() - start)
  index = VectorStoreIndex.from_documents(documents)
  print('Done', time.time() - start)
  print()
  index.storage_context.persist(persist_dir=STORAGE_DIR)
else:
  print('Loading Index From Storage')
  # load the existing index
  storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
  index = load_index_from_storage(storage_context)
  
start = time.time()
print('Getting response...')
query_engine = index.as_query_engine()
query_retriever = index.as_retriever()
print('Done', time.time() - start)

Getting response ....
Done 0.022955656051635742


In [12]:
page_label = 1

def get_file_detail(result: NodeWithScore):
  dic = {
    'text': result.text,
    'file': result.metadata['file_path'],
    'page': result.metadata['page_label']
  }
  return dic


def process_input(history, html_box):
  question = history[-1][0]
  response = query_engine.query(question)
  global page_label
  info_list = []
  results = query_retriever.retrieve(question)
  for result in results:
    detail = get_file_detail(result)
    info_list.append(f'Page {detail["page"]}\n{detail["text"]}')
    if detail['page']:
      page_label = detail['page']
  if history[-1][1]:
    history.append([None, None])
  history[-1][1] = '\n\n'.join(info_list)
  history.append([None, str(response)])
  return history, f'<embed src="{PDF_URL}#page={page_label}" width="700" height="900" type="application/pdf">'


def add_message(history, message):
  for x in message["files"]:
    history.append(((x,), None))
  if message["text"] is not None:
    history.append((message["text"], None))
  return history, gr.MultimodalTextbox(
    value=None, interactive=False, file_types=None, placeholder="Processing...", show_label=False)


def main():
  with gr.Blocks(css=open('./main.css', mode='r', encoding='utf-8').read()) as demo:
    with gr.Row():
      with gr.Column(scale=1):
        chatbot = gr.Chatbot(scale=1)
        chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"],
                                          placeholder="Enter message or upload file...", show_label=False)
      with gr.Column(scale=1):
        html_box = gr.HTML(value=f'<embed src="{PDF_URL}#page=1" width="700" height="900" type="application/pdf">')
      chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
      bot_msg = chat_msg.then(process_input, [chatbot, html_box], [chatbot, html_box])
      bot_msg.then(lambda: gr.MultimodalTextbox(
        interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False), None, [chat_input])
  demo.launch(server_name='0.0.0.0')


if __name__ == '__main__':
  main()

Running on local URL:  http://0.0.0.0:7860

To create a public link, set `share=True` in `launch()`.


In [14]:
# question = '上海市今年最重要的政策是什么'
# response = query_engine.query(question)
# print('Response:\n')
# print(response)
# global page_label
# info_list = []
# results = query_retriever.retrieve(question)
# print('\nResult:\n')
# print(results)

Response:

根据提供的上下文信息，上海市今年最重要的政策是《上海市提信心扩需求稳增长促发展行动方案》。这一政策旨在贯彻党的二十精神和中央经济工作会议精神，全面落实市党代会和相关全会部署，抓牢高质量发展的首要任务，通过提振市场预期和信心，推动经济社会发展的良好开局和稳步增长，实现有效的质量提升和合理的数量增长，努力实现全年经济发展的主要预期目标，以新的气象和作为推动高质量发展取得新成效。行动方案中包含了一系列助企纾困行动、促进消费市场创新发展、以及延期还本付息等支持措施。

Result:

[NodeWithScore(node=TextNode(id_='5c670250-c3d7-420d-bce1-2d22b9f846ba', embedding=None, metadata={'page_label': '369', 'file_name': 'business.pdf', 'file_path': 'C:\\Projects\\BusinessPolicyRAG\\data\\rag\\business.pdf', 'file_type': 'application/pdf', 'file_size': 52859635, 'creation_date': '2024-04-12', 'last_modified_date': '2024-04-12'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='8ff6f743-e864-4478-aa9b-26883caa92af', node_type=<ObjectType.DOCUMENT: