In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [6]:
import os

# Input the OpenAI API key
os.environ["OPENAI_API_KEY"] = ""

In [15]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

In [4]:
reader = SimpleDirectoryReader("./data/")
documents = reader.load_data()

In [5]:
documents

[Document(id_='c1e3dfc1-fbea-4576-a894-fc0954111449', embedding=None, metadata={'page_label': '1', 'file_name': 'NQLD01.pdf', 'file_path': '/Users/tuan.tran/Workspace/PersonalRepo/chatbot/chatbot-core/notebooks/data/NQLD01.pdf', 'file_type': 'application/pdf', 'file_size': 401404, 'creation_date': '2025-01-21', 'last_modified_date': '2024-12-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Trang 1 \nCÔNG TY CẤP NƯỚC SÀI GÒN \nTRÁCH NHIỆM HỮU HẠN MỘT THÀNH VIÊN \nCÔNG TY CỔ PHẦN CẤP NƯỚC TRUNG AN \n \n \nCỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM \nĐộc lập – Tự do – Hạnh phúc \n \n \nNỘI QUY LAO ĐỘNG \n(Ban hành kèm theo Quyết định số  099 /QĐ-TA-TCHC ngày  07 / 8 /2018) \n \nCHƯƠNG I \nNHỮNG QUY Đ

In [7]:
from llama_index.core.node_parser import SentenceSplitter

nodes = SentenceSplitter().get_nodes_from_documents(documents)

In [13]:
print(f"There are total {len(nodes)} nodes")

There are total 34 nodes


In [9]:
from app.integrations.llama_index.docstore.mssql import MSSQLDocumentStore

from app.settings import Constants, Secrets

docstore = MSSQLDocumentStore.from_uri(
    uri=Constants.MSSQL_CONNECTOR_URI.format(
        user=Secrets.MSSQL_USER,
        password=Secrets.MSSQL_SA_PASSWORD,
        host="127.0.0.1",
        port=Secrets.MSSQL_PORT,
        db_name=Secrets.MSSQL_DB,
        driver=Constants.MSSQL_DRIVER
    )
)
docstore.add_documents(nodes)

In [10]:
from llama_index.core import StorageContext

storage_context = StorageContext.from_defaults(docstore=docstore)

In [11]:
len(storage_context.docstore.docs)

34

In [14]:
llm = OpenAI(temperature=0, model="gpt-4o-mini")

Settings.llm = llm
Settings.chunk_size = 1024

In [16]:
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [17]:
query_engine = vector_index.as_query_engine()
response = query_engine.query("What is a summary of this document?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [18]:
print(response)

The document outlines the labor regulations for employees of the Trung An Water Supply Joint Stock Company in Vietnam. It includes the purpose of the regulations, which is to establish labor discipline and the consequences for violations. The regulations apply to all Vietnamese employees working under various types of labor contracts. It specifies working hours, which are set at 8 hours per day and 40 hours per week, with details on administrative working hours and provisions for direct production workers. Additionally, it addresses circumstances under which the company may be exempt from liability, such as natural disasters or other unforeseen events. The document also includes provisions for the registration, modification, and implementation of the labor regulations, emphasizing the responsibility of all employees to adhere to these rules.
