# Creating an RAG chatbot

By [Purinat Pattanakeaw](<https://www.github.com/Purinat33>)

## Document Preprocessing

* Remove misc. contents like document's title in footer of every page using tools like `fitz`:

## Loading and Reading data

Using `SimpleDirectoryReader` which reads an entire directory, including files like images, PDFs etc.

In [7]:
# https://docs.llamaindex.ai/en/stable/understanding/loading/loading/
from llama_index.core import SimpleDirectoryReader

# We will be reading it in the storage steps
# documents = SimpleDirectoryReader("../docs").load_data()

## Embedding Model

Using `sentence-transformers` Embedding Model via HuggingFace

In [8]:
# https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings/
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from llama_index.core import Settings
from llama_index.embeddings.langchain import LangchainEmbedding

model_name = 'sentence-transformers/all-MiniLM-L6-v2'
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# Caching model
# https://python.langchain.com/v0.2/docs/how_to/caching_embeddings/
# https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.huggingface.HuggingFaceEmbeddings.html
store = LocalFileStore('cache')
embedder = CacheBackedEmbeddings.from_bytes_store(
    hf, store, namespace=model_name
)

Settings.embed_model = LangchainEmbedding(embedder)

## Splitting and Chunking

In [9]:
# https://docs.llamaindex.ai/en/stable/understanding/loading/loading/
# https://medium.com/@kofsitho/basic-tutorial-rag-with-llama-index-8927a5716dd1
from llama_index.core.node_parser import SentenceSplitter

Settings.splitter = SentenceSplitter(chunk_size=512, chunk_overlap=16)

## Vector Storage and Persistent Storage

In [10]:
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext, load_index_from_storage
import os

# We moved the loading logic of loading documents here so we can check for persistent
# Check if persistent storage exists, if so: load from there.
persist_dir = './persist/'
if os.path.exists(persist_dir) and len(os.listdir(persist_dir)) > 0:
    print(f"Storage exists at {persist_dir}: Loading")
    storage_context = StorageContext.from_defaults(persist_dir='./persist/')
    index = load_index_from_storage(storage_context=storage_context)
else:   
    print(f"{persist_dir} not exists: Performing loading")
    documents = SimpleDirectoryReader("./docs/").load_data()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=persist_dir)

Storage exists at ./persist/: Loading


## Set defeault retriever

In [1]:
# https://medium.com/@kofsitho/basic-tutorial-rag-with-llama-index-8927a5716dd1
base_retriever = index.as_retriever(similarity_top_k=3)
source_nodes = base_retriever.retrieve("metabolism")
print(f"Score: {source_nodes[0].score:.3f}")
print(f"Content:\n {source_nodes[0].get_content()}")

NameError: name 'index' is not defined