## Load data

In [1]:
from llama_hub.file.pdf.base import PDFReader
from llama_index import SimpleDirectoryReader


documents = PDFReader().load_data(
    file='LLM_TRAIN/eBook-How-to-Build-a-Career-in-AI.pdf')

# alternative

# documents = SimpleDirectoryReader(
#     input_files=['LLM_TRAIN/eBook-How-to-Build-a-Career-in-AI.pdf']
# ).load_data()


len(documents)

41

## Parse Documents into Nodes using SentenceWindowRetrieval

In [2]:
from llama_index.node_parser import SentenceWindowNodeParser

# create the sentence window node parser w/ default settings
# we can use sentence_splitter args to say how splits text into sentences

node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    include_prev_next_rel=True,
    original_text_metadata_key="original_text",
)

## LLM and embeddings

In [3]:
from llama_index.embeddings import resolve_embed_model
#  BGE embedder from HuggingFace
embed_model = resolve_embed_model("local:BAAI/bge-large-en-v1.5") # "local:BAAI/bge-small-en-v1.5"

# from llama_index import set_global_tokenizer
# # tokenizer for huggingface
# from transformers import AutoTokenizer

# set_global_tokenizer(
#     AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").encode
# )

In [4]:
from llama_index.llms import HuggingFaceLLM
from llama_index.prompts import PromptTemplate


llm = HuggingFaceLLM(
     # model_name="Deci/DeciLM-6b-instruct",
    # tokenizer_name="Deci/DeciLM-6b-instruct",

    # model_name="WeOpenML/Alpaca-7B-v1",  # alapca of stanford
    # tokenizer_name="WeOpenML/Alpaca-7B-v1",
    
    model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    query_wrapper_prompt=PromptTemplate(
        "<|system|>\n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"),
    # query_wrapper_prompt=PromptTemplate(template),
    
    context_window=2048, # 4096
    max_new_tokens=256,  # 512
    model_kwargs={'trust_remote_code': True},
    generate_kwargs={"temperature": 0.7,"do_sample":True},
    device_map="auto",
)

In [5]:
from llama_index import ServiceContext

# service context is a wrapper object that contains all the context needed for indexing
sentence_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    node_parser=node_parser,
)

## Building the index

In [6]:
from llama_index import VectorStoreIndex

sentence_index = VectorStoreIndex.from_documents(
    documents, service_context=sentence_context
)
# Save index to disk for later loading
sentence_index.storage_context.persist(persist_dir="LLM_TRAIN/sentence_index")


### Building the postprocessor
Optionality you can add a reranker

In [7]:
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor

# This takes a value stored in the metadata and replaces a node text
postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window")

## Query the index

In [8]:
sentence_window_engine = sentence_index.as_query_engine(
    similarity_top_k=3,  # fetch the six most similarity
    node_postprocessors=[postproc])

In [9]:
print(sentence_window_engine.query( "What are the keys to building a career in AI?"))

The keys to building a career in AI are:

1. Learning foundational technical skills - this includes topics such as programming, machine learning, and data analysis.
2. Finding a job - this involves pursuing projects, building a portfolio, and creating impact.
3. Finding a job in a specific field, such as AI engineering or data science.
4. Focusing on learning foundational skills over time to build a solid technical foundation.
5. Tackling large projects and working in teams to improve collaboration and communication skills.
6. Building a solid portfolio that demonstrates your skills and experience.
7. Building a network of industry professionals and potential employers.
8. Seeking out mentorship and certifications to improve your skills further.


## Obtain the documents from which the context is extracted

In [10]:
question =  'how can I start learning on the AI way?'
result = sentence_window_engine.query(question )
result.response # get the response text 

"The best way to start learning on the AI way is to start small and succeed. Instead of trying to exercise for 30 minutes a day, aim for doing just one push-up. This approach may be helpful for those who want to spend more time studying. Even if you learn nothing in that 10 seconds, you're building the habit of studying daily. On some days, you may end up studying for an hour or longer. To maintain a steady pace of learning for years, cultivate the habit of learning a little bit every week. Learning technical skills for a promising AI career involves learning foundational technical skills, working on projects, and finding a job supported by being part of a community."

In [11]:

# source_nodes   returns a List  of *NodeWithScore* object  that 
# contain the most top_k chunks relevants to the query
result.source_nodes

# response.metadata  # also contains data relevant to the context used

[NodeWithScore(node=TextNode(id_='f16683cc-d3c5-4dd1-9897-a3166a2402ef', embedding=None, metadata={'window': 'Fogg explains that the best way to build a new habit is to start small \nand succeed, rather than start  too big and fail.  For example, rather than trying to \nexercise for 30 minutes a day, he recommends aspiring to do just one push-up, and \ndoing it consistently.\n This approach may be helpful to those of you who want to spend more time studying. \n If you start by holding yourself accountable for watching, say, 10 seconds of an \neducational video every day — and you do so consistently — the habit of studying daily \nwill grow naturally.  Even if you learn nothing in that 10 seconds, you’re establishing the \nhabit of studying a little every day.  On some days, maybe you’ll end up studying for an \nhour or longer.', 'original_text': 'If you start by holding yourself accountable for watching, say, 10 seconds of an \neducational video every day — and you do so consistently —

In [12]:
out_dict = {
    'query': question,
    'response': result.response,
    'documents': [node.metadata['file_name'] for node in result.source_nodes],
    'pages': [node.metadata['page_label'] for node in result.source_nodes] 

}
out_dict


{'query': 'how can I start learning on the AI way?',
 'response': "The best way to start learning on the AI way is to start small and succeed. Instead of trying to exercise for 30 minutes a day, aim for doing just one push-up. This approach may be helpful for those who want to spend more time studying. Even if you learn nothing in that 10 seconds, you're building the habit of studying daily. On some days, you may end up studying for an hour or longer. To maintain a steady pace of learning for years, cultivate the habit of learning a little bit every week. Learning technical skills for a promising AI career involves learning foundational technical skills, working on projects, and finding a job supported by being part of a community.",
 'documents': ['eBook-How-to-Build-a-Career-in-AI.pdf',
  'eBook-How-to-Build-a-Career-in-AI.pdf',
  'eBook-How-to-Build-a-Career-in-AI.pdf'],
 'pages': ['11', '10', '9']}

In [13]:
result.source_nodes[0].metadata['original_text'],result.source_nodes[0].metadata['page_label'],

('If you start by holding yourself accountable for watching, say, 10 seconds of an \neducational video every day — and you do so consistently — the habit of studying daily \nwill grow naturally. ',
 '11')

## View the resources used

In [14]:
import GPUtil

gpus = GPUtil.getGPUs()

import psutil

ram_info = psutil.virtual_memory()

print(f"Total RAM: {ram_info.total / (1024 ** 3):.2f} GB")
print(f"Aviable RAM: {ram_info.available / (1024 ** 3):.2f} GB\n")
if gpus:
    print(f"Total GPU: {gpus[0].memoryTotal} MB")

    # Imprimir la cantidad de memoria GPU disponible
    print(f"Aviable GPU: {gpus[0].memoryFree} MB")
else:
    print("There aren't aviable GPU."
         )

Total RAM: 31.24 GB
Aviable RAM: 27.32 GB

Total GPU: 12288.0 MB
Aviable GPU: 2791.0 MB


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
