In [111]:
import os
import torch
from dotenv import load_dotenv

import llama_index
from llama_index.core import SimpleDirectoryReader # to load docs
from llama_index.core import Document

from llama_index.core.prompts import PromptTemplate # generate prompt template with role
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core import ChatPromptTemplate

from llama_index.core.response.notebook_utils import display_response # formatting


from llama_index.llms.huggingface import HuggingFaceLLM # llm

from llama_index.embeddings.huggingface import HuggingFaceEmbedding # embedding

from llama_index.core import Settings # pass llm and embedding

from llama_index.core import VectorStoreIndex # store vector store

from huggingface_hub import login

In [62]:
import warnings
warnings.filterwarnings("ignore")

In [63]:
load_dotenv("../finetune/.env")
hf_token = os.getenv("hf_token")
login(hf_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/s448780/.cache/huggingface/token
Login successful


## embedding

In [64]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

In [65]:
Settings.embed_model = embed_model

## Loading documents

In [81]:
documents = SimpleDirectoryReader("./test_doc").load_data()

In [82]:
books = set()
for book in documents:
  books.add(book.metadata["file_name"])

books

{'ucl_2023.pdf'}

## vector store

In [83]:
vector_index = VectorStoreIndex.from_documents(documents)

In [84]:
vector_index.storage_context.persist(persist_dir="./vector_store")

In [85]:
# to load from storage
# from llama_index.core import StorageContext, load_index_from_storage
# storage_context = StorageContext.from_defaults(persist_dir = "./vector_store")
# vector_index = load_index_from_storage(storage_context)

In [86]:
# new_document = SimpleDirectoryReader("./new_test_doc/", filename_as_id=True).load_data()

In [87]:
# isinstance(new_document[0], Document)

In [88]:
# update_kwargs = {
#     "delete_kwargs": {"team" : "Real Madrid"},
#     "insert_kwargs": {"Manchester City"},
# }
# vector_index.delete_ref_doc(documents[0].get_doc_id, delete_kwargs = {"delete_kwargs" : "how many titles does real madrid have?"}, delete_from_docstore=True)

In [90]:
test = Document.example()
test

Document(id_='6be5e8c3-04eb-461e-8cf3-26be69b44d2a', embedding=None, metadata={'filename': 'README.md', 'category': 'codebase'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='\nContext\nLLMs are a phenomenal piece of technology for knowledge generation and reasoning.\nThey are pre-trained on large amounts of publicly available data.\nHow do we best augment LLMs with our own private data?\nWe need a comprehensive toolkit to help perform this data augmentation for LLMs.\n\nProposed Solution\nThat\'s where LlamaIndex comes in. LlamaIndex is a "data framework" to help\nyou build LLM  apps. It provides the following tools:\n\nOffers data connectors to ingest your existing data sources and data formats\n(APIs, PDFs, docs, SQL, etc.)\nProvides ways to structure your data (indices, graphs) so that this data can be\neasily used with LLMs.\nProvides an advanced retrieval/query interface over your data:\nFeed in any LLM input prompt, get back retrieved co

In [93]:
test.text = "Real Madrid won 15 UCL titles as of June 2024"

In [95]:
test

Document(id_='6be5e8c3-04eb-461e-8cf3-26be69b44d2a', embedding=None, metadata={'filename': 'README.md', 'category': 'codebase'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Real Madrid won 15 UCL titles as of June 2024', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')

In [96]:
vector_index.insert(test)

In [97]:
Settings.llm = llm
query_engine = vector_index.as_query_engine()
response = query_engine.query("how many titles does real madrid have?")
display_response(response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


**`Final Response:`** As of June 2024, Real Madrid has won 15 UCL titles.

## loading the LLM

In [51]:
model_id = "Writer/camel-5b-hf"

In [52]:
query_wrapper_prompt = PromptTemplate(
    "Always answer the question, even if the context isn't helpful."
    "Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{query_str}\n\n### Response:"
)

In [53]:
llm = HuggingFaceLLM(
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=model_id,
    model_name=model_id,
    device_map="auto",
    tokenizer_kwargs={"max_length": 2048},
    model_kwargs={"torch_dtype": torch.float16, "load_in_8bit" : True}
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [54]:
Settings.llm = llm

## query

In [34]:
# Settings.llm = None
query_engine = vector_index.as_query_engine()

In [59]:
response = query_engine.query("how many titles does real madrid have? use the most updated information")
display_response(response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


**`Final Response:`** Real Madrid won 14 UCL titles, with the most recent being in June 2023.

In [60]:
response

Response(response='Real Madrid won 14 UCL titles, with the most recent being in June 2023.', source_nodes=[NodeWithScore(node=TextNode(id_='266ef5b4-1dec-4bbf-9258-89d8f38b37ec', embedding=None, metadata={'page_label': '1', 'file_name': 'ucl_2023.pdf', 'file_path': '/home/s448780/workspace/cognitive_ai/RAG/test_doc/ucl_2023.pdf', 'file_type': 'application/pdf', 'file_size': 10481, 'creation_date': '2024-06-17', 'last_modified_date': '2024-06-17'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='70ddeb15-3a3a-461a-a272-79d46a795af7', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'ucl_2023.pdf', 'file_path': '/home/s448780/workspace/cognitive_ai/RAG/test_doc/ucl_2023.pdf', 'f

In [64]:
new_data = SimpleDirectoryReader("./new/").load_data()
new_store = VectorStoreIndex.from_documents(new_data)

## update vector store

In [101]:
from datetime import date
date.today().strftime("%b %d, %Y")

'Jun 18, 2024'

In [106]:
# the function will trigger only when __update__store__ is present in the query
update_query = f"__update__store__ Real Madrid has 15 UCL titles as of {date.today().strftime('%b %d, %Y')} "
update_query.split("__update__store__")[-1].strip()

'Real Madrid has 15 UCL titles as of Jun 18, 2024'

In [108]:
type(vector_index)

llama_index.core.indices.vector_store.base.VectorStoreIndex

In [112]:
def update_vector_store(text_to_add : str, vector_index : llama_index.core.indices.vector_store.base.VectorStoreIndex) -> None:
    if not "__update__store__" in text_to_add:
        print("No information to add")
        return
    document_to_add = Document.example()
    info = text_to_add.split("__update__store__")[-1].strip()
    document_to_add.text = info
    vector_index.insert(document_to_add)