# 1. Loading

## Import libraries and settings

In [None]:
from app.utils.llm.helpers import get_openai_api_key

# Get the OpenAI API key
openai_api_key = get_openai_api_key()

In [None]:
from llama_index.core import Settings
from app.utils.llm.helpers import init_llm_configurations
from app.settings import Constants

init_llm_configurations(llm_model=Constants.LLM_MODEL, embedding_model=Constants.EMBEDDING_MODEL)

Settings

In [None]:
import nest_asyncio

# Apply nest_asyncio to allow asyncio in Jupyter Notebook
nest_asyncio.apply()

## Read file pdf

### Behavior

Assume that already has the pdf file.

- Use markitdown to convert pdf to markdown
- Create a Document object of LlamaIndex from the markdown file
- Create an Ingestion Pipeline (cache enabled) and ingest the Document to Node objects
- Save those nodes to the storage context including docstore, vectorstore, and index store

### Chunking method

- Firstly, having the document with Vietnamese text
- Translate the document to English
- Use [semantic splitter](https://docs.llamaindex.ai/en/stable/examples/node_parsers/semantic_chunking/) to split the document into chunks
  - Threshold: 85
  - Buffer size: 3
  - Why?

References:
- https://youtu.be/8OJC21T2SL4?t=1933

Other methods:
- [Semantic Double Merging Chunking](https://docs.llamaindex.ai/en/stable/examples/node_parsers/semantic_double_merging_chunking/)

## Loading

In [None]:
from app.integrations.llama_index.ingestion_pipelines.readers import MarkitdownReader

# Initialize the MarkitdownReader
markitdown_reader = MarkitdownReader()
filepath = "data/NQLD01.pdf"

documents = markitdown_reader.load_data(filepath)
documents[0].metadata

## Chunking

### Translation from Vietnamese to English

In [None]:
from app.integrations.llama_index.ingestion_pipelines.translators import Translator

translator = Translator.from_defaults(source_language="vietnamese", target_language="english")
translated_documents = translator.get_translated_documents(documents, show_progress=True)

translated_documents

### Node splitting

In [None]:
from llama_index.core.node_parser import SemanticSplitterNodeParser

# Text splitters
# Use the SemanticSplitterNodeParser to split the text into nodes
semantic_splitter = SemanticSplitterNodeParser.from_defaults(
    embed_model=Settings.embed_model,
    breakpoint_percentile_threshold=85,
    buffer_size=3
)

### Metadata extraction

In [None]:
"""
Metadata key: issue_date, is_outdated
Input from user input on frontend.
Purpose: To filter out outdated documents.
"""
issue_data = input("Enter the issue date of the document (YYYY-MM-DD): ") or "2020-01-01"
is_outdated = bool(input("Is the document outdated? (True/False): ")) or False

for document in translated_documents:
    document.metadata["issue_date"] = issue_data
    document.metadata["is_outdated"] = is_outdated

In [None]:
"""
Metadata key: excerpt_keywords
Extract keywords from the text
Purpose: Can be used to do topic/tag or keyword-based search (metadata filter).
"""
from llama_index.core.extractors import KeywordExtractor

keyword_extractor = KeywordExtractor(llm=Settings.llm, keywords=3)

In [None]:
"""
"""
from llama_index.core.extractors import SummaryExtractor

summary_extractor = SummaryExtractor(summaries=["prev", "self", "next"], llm=Settings.llm)

### Ingestion pipeline

In [None]:
from llama_index.core.ingestion import IngestionPipeline

# Transformations
# TODO: add a cleaner to cleanup llm's generated text, e.g. here's a summary of the document: ...
transformations = []
transformations.append(semantic_splitter)
transformations.append(keyword_extractor)
transformations.append(summary_extractor)
transformations.append(Settings.embed_model)

# Initialize the ingestion pipeline
pipeline = IngestionPipeline(transformations=transformations)

In [None]:
# Run the pipeline
nodes = await pipeline.arun(documents=translated_documents, show_progress=True)
nodes

### Check content

In [None]:
print(f"Total nodes: {len(nodes)}")
for node in nodes:
    print(f"================== {node.id_} ========================")
    print(node.text)

### Check metadata

In [None]:
print(f"Total nodes: {len(nodes)}")
for node in nodes:
    print(f"================== {node.id_} ========================")
    print(node.metadata)

### Check embedding

In [None]:
print(f"Total nodes: {len(nodes)}")
for node in nodes:
    print(f"================== {node.id_} ========================")
    print(f"Dimensions: {len(node.embedding)}")
    print(node.embedding[:50])

# 2. Storing

In [None]:
duplicate_nodes = []
# Duplicate to number of nodes to 10x
for node in nodes:
    for _ in range(1000):
        duplicate_nodes.append(node)
len(duplicate_nodes)

In [None]:
import qdrant_client
from qdrant_client.http.models import VectorParams
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.fastembed import FastEmbedEmbedding

vector_params = VectorParams(size=768, distance="Cosine")

client = qdrant_client.QdrantClient(location=":memory:") # In-memory qdrant
vector_store = QdrantVectorStore(client=client, collection_name="test_poc1", dense_config=vector_params)

In [None]:
from llama_index.core import StorageContext

storage_context = StorageContext.from_defaults(vector_store=vector_store)

# 3. Indexing

## Behavior

There're 2 ways to create an index:
- Load from transformed nodes (first time)
- Load from vector store - qdrant

### Build index from nodes

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.core.response.notebook_utils import display_response

index = VectorStoreIndex(nodes=duplicate_nodes, storage_context=storage_context)

### Load index from storage

In [None]:
loaded_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [None]:
storage_context.persist()

# 4. Querying

In [None]:
# Choose between index or loaded_index
query_engine = index.as_query_engine(similarity_top_k=3)

question = "company"

response = query_engine.query(question)
display_response(response, show_source=True, show_metadata=True)

# 5. Evaluation

# Trash

# retriver

In [None]:
from llama_index.retrievers.bm25 import BM25Retriever
import Stemmer

bm25_retriever = BM25Retriever.from_defaults(
    docstore=docstore,
    similarity_top_k=1,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
)
from llama_index.core.response.notebook_utils import display_source_node
retrieved_nodes = bm25_retriever.retrieve(
    "What do you know?"
)
for node in retrieved_nodes:
    print(node)

In [None]:
from llama_index.core.postprocessor import SentenceEmbeddingOptimizer
from llama_index.core.postprocessor import EmbeddingRecencyPostprocessor
from llama_index.core.postprocessor import LLMRerank


node_postprocessors = [
    SentenceEmbeddingOptimizer(
        embed_model=Settings.embed_model,
        # percentile_cutoff=0.5,
        threshold_cutoff=0.7,
    ),
    EmbeddingRecencyPostprocessor(date_key="date", similarity_cutoff=0.7),
    LLMRerank(top_n=2),
]

In [None]:
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import get_response_synthesizer
from llama_index.core.response_synthesizers.type import ResponseMode

# Configure response synthesizer
response_synthesizer = get_response_synthesizer(llm=Settings.llm, response_mode=ResponseMode.COMPACT)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=bm25_retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=node_postprocessors,
)

In [None]:
from llama_index.core import PromptTemplate
from llama_index.core.llms import ChatMessage, MessageRole

CUSTOM_PROMPT = PromptTemplate(
    """
    Based on the conversation history between the User and the Assistant, along with the User's new question, analyze and understand the question within the context of the conversation.
    Provide a relevant response in Vietnamese, using a professional tone like a Human Resource Specialist.  

    <Conversation History>
    {chat_history}

    <Current Question>
    {question}
    """
)

custom_chat_history = [
    ChatMessage(
        role=MessageRole.USER,
        content="Hello assistant, we are having a conversation about the company's regulations.",
    ),
    ChatMessage(
        role=MessageRole.ASSISTANT,
        content="Great, would you like to know more information about the company's regulations?",
    ),
]