# Import Modules

In [1]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.node_parser import SimpleNodeParser
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index import set_global_service_context
from llama_index.prompts import PromptTemplate

# Read documents and Split into nodes

In [2]:
documents = SimpleDirectoryReader("./pdf").load_data()
node_parser = SimpleNodeParser.from_defaults()

In [3]:
# nodes = node_parser.get_nodes_from_documents(documents)


In [4]:
# nodes

# Embeddings

In [5]:
from llama_index.embeddings import HuggingFaceEmbedding

# embed_model_name = 'sentence-transformers/all-MiniLM-L6-v2'
# embed_model_name = "BAAI/bge-base-en-v1.5"

embed_model = HuggingFaceEmbedding(model_name = "BAAI/bge-base-en-v1.5", cache_folder = 's_bert',)

In [6]:
postprocessor = SentenceTransformerRerank(
  model = "sentence-transformers/msmarco-distilbert-base-dot-prod-v3",
  top_n = 3,
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/msmarco-distilbert-base-dot-prod-v3 and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
from llama_index.llms import OpenAI
from dotenv import load_dotenv
import os
import openai
os.environ['OPENAI_API_KEY'] = "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"
openai.api_key = os.environ['OPENAI_API_KEY']
llm = OpenAI(model='gpt-3.5-turbo', max_tokens=512, temperature=0.1)

# ChromaDB

In [8]:
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
import chromadb

chroma_client = chromadb.PersistentClient(path="./dbb")
# chroma_client.delete_collection(name="qa-pdf")
chroma_collection = chroma_client.get_or_create_collection(name="qa-pdf")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

In [9]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# LLMs

In [10]:
# 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

In [11]:
# Load model directly
# from transformers import AutoTokenizer, AutoModelForCausalLM
# 
# tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
# model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta")

In [12]:
# from llama_index.prompts import PromptTemplate

# system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
# - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
# - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
# - StableLM is more than just an information source, StableLM is also able to write poetry, short stories.
# - StableLM will refuse to participate in anything that could harm a human.
# - StableLM Rewrite an answer that combines multiple data sources, semantically unchanged
# """

# template = (
#     "We have provided context information below. \n"
#     "---------------------\n"
#     "{context_str}"
#     "\n---------------------\n"
#     "Given this information, please answer the question: {query_str}\n"
# )
# qa_template = PromptTemplate(template)
# 
# system_prompt = """
# - StableLM Rewrite an answer that combines multiple data sources, semantically unchanged
# """
# # This will wrap the default prompts that are internal to llama-index
# query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")

In [13]:
# from llama_index.prompts import PromptTemplate
# 
# text_qa_template_str = (
#     "Context information is"
#     " below.\n---------------------\n{context_str}\n---------------------\nUsing"
#     " both the context information and also using your own knowledge, answer"
#     " the question: {query_str}\nIf the context isn't helpful, you can also"
#     " answer the question on your own.\n"
# )
# text_qa_template = PromptTemplate(text_qa_template_str)

In [14]:
from llama_index import ServiceContext

service_context = ServiceContext.from_defaults(
    embed_model=embed_model,
    node_parser=node_parser,
    llm=llm,
)

In [15]:
set_global_service_context(service_context)

# SET PROMPT

In [16]:
#Question answering template
text_qa_template_str = (
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Using both the context information and also using your own knowledge, "
    "answer the question: {query_str}\n"
    "If the context isn't helpful, you can also answer the question on your own. You can choose answer the question by Vietnamese or English\n"
)
text_qa_template = PromptTemplate(text_qa_template_str)

In [17]:
#Refine template
refine_template_str = (
    "The original question is as follows: {query_str}\n"
    "We have provided an existing answer: {existing_answer}\n"
    "We have the opportunity to refine the existing answer "
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{context_msg}\n"
    "------------\n"
    "Using both the new context and your own knowledge, update or repeat the existing answer.\n"
)
refine_template = PromptTemplate(refine_template_str)

# Save to disk

In [18]:
index = VectorStoreIndex.from_documents(
        documents = documents,
        vector_store = vector_store,
        service_context = service_context,
        text_qa_template = text_qa_template,
        refine_template = refine_template,
        postprocessor = postprocessor,
        storage_context=storage_context
    )

# Load from disk

In [19]:
# index = VectorStoreIndex.from_vector_store(
#     vector_store = vector_store,
#     storage_context=storage_context,
# )

# LLM

# Query the index

In [20]:
chat_engine = index.as_chat_engine(
        chat_mode = "condense_question",
        verbose = True,
    )

In [21]:
streaming_response = chat_engine.stream_chat("What is machine learning?")                                          

Querying with: Can you explain what machine learning is?


In [22]:
for token in streaming_response.response_gen:
    print(token, end="")

Machine learning is a discipline of artificial intelligence that enables machines to learn from data and past experiences. It allows computers to automatically identify patterns and make predictions without explicit programming. Machine learning algorithms learn directly from data, rather than relying on predetermined equations, and improve their performance with more available samples. It has become essential in various fields, such as computational finance, computer vision, computational biology, automotive, aerospace, manufacturing, and natural language processing. Machine learning works by training algorithms on a dataset to create a model, which is then used to make predictions on new input data. The accuracy of the predictions is checked, and the algorithm is either deployed or trained repeatedly until the desired accuracy is achieved. Machine learning is broadly categorized into four main types based on different methods and ways of learning.