## Load data

In [8]:
from llama_hub.file.pdf.base import PDFReader
from llama_index import SimpleDirectoryReader


documents = PDFReader().load_data(
    file='LLM_TRAIN/eBook-How-to-Build-a-Career-in-AI.pdf')

# alternative

# documents = SimpleDirectoryReader(
#     input_files=['LLM_TRAIN/eBook-How-to-Build-a-Career-in-AI.pdf']
# ).load_data()


len(documents)

41

## Parse Documents into Nodes using SentenceWindowRetrieval

In [10]:
from llama_index.node_parser import SentenceWindowNodeParser

# create the sentence window node parser w/ default settings
# we can use sentence_splitter args to say how splits text into sentences

node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    include_prev_next_rel=True,
    original_text_metadata_key="original_text",
)

## LLM and embeddings

In [9]:
from llama_index.embeddings import resolve_embed_model
#  BGE embedder from HuggingFace
embed_model = resolve_embed_model("local:BAAI/bge-large-en-v1.5") # "local:BAAI/bge-small-en-v1.5"

from llama_index import set_global_tokenizer
# tokenizer for huggingface
from transformers import AutoTokenizer

set_global_tokenizer(
    AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").encode
)

In [11]:
from llama_index.llms import HuggingFaceLLM
from llama_index.prompts import PromptTemplate


llm = HuggingFaceLLM(
     # model_name="Deci/DeciLM-6b-instruct",
    # tokenizer_name="Deci/DeciLM-6b-instruct",

    # model_name="WeOpenML/Alpaca-7B-v1",  # alapca of stanford
    # tokenizer_name="WeOpenML/Alpaca-7B-v1",
    
    model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    query_wrapper_prompt=PromptTemplate(
        "<|system|>\n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"),
    # query_wrapper_prompt=PromptTemplate(template),
    
    context_window=2048, # 4096
    max_new_tokens=256,  # 512
    model_kwargs={'trust_remote_code': True},
    generate_kwargs={"temperature": 0.7,"do_sample":True},
    device_map="auto",
)

In [12]:
from llama_index import ServiceContext

# service context is a wrapper object that contains all the context needed for indexing
sentence_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    node_parser=node_parser,
)

## Building the index

In [14]:
from llama_index import VectorStoreIndex

sentence_index = VectorStoreIndex.from_documents(
    documents, service_context=sentence_context
)
# Save index to disk for later loading
sentence_index.storage_context.persist(persist_dir="LLM_TRAIN/sentence_index")


### Building the postprocessor
Optionality you can add a reranker

In [15]:
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor

# This takes a value stored in the metadata and replaces a node text
postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window")

## Query the index

In [16]:
sentence_window_engine = sentence_index.as_query_engine(
    similarity_top_k=6,  # fetch the six most similarity
    node_postprocessors=[postproc])

In [20]:
print(sentence_window_engine.query( "What are the keys to building a career in AI?"))

The following are the keys to building a career in AI:

1. Learning foundational technical skills: Learning fundamental technical skills such as programming, data analysis, and machine learning is crucial for building a successful career in AI. These skills can be acquired through courses, web resources, and industry blogs.

2. Work on projects: Work on projects that require AI to solve real-world problems. This will help you gain practical experience and build a portfolio that can be used to showcase your skills.

3. Find a job: Finding a job in AI is challenging, but it is possible through networking, interviewing, and job boards. Make sure to research the company and the role you are applying for to ensure that the job description and requirements align with your skills and experience.

4. Build a career: Build a career in AI by continuing to learn and work on meaningful projects. This can help you gain more technical skills and advance your career in AI.

5. Tackle complex projects

In [25]:
print(sentence_window_engine.query( 'how can I start learning on the AI way?'))


To start learning on the AI way, here are some tips:

1. Start small and succeed: Instead of trying to achieve a large goal right away, start with a push-up exercise or a small task. This approach can help build a habit gradually and help you succeed more quickly.

2. Try small projects: Instead of tackling a large project right away, try working on smaller projects that require less experience or effort. This can help you build a foundation of technical skills, which can be applied to larger projects later.

3. Learn from others: Join a community of individuals working in AI, attend conferences or online meetups, and learn from their experiences and insights. This can help you stay up-to-date with the latest technologies and trends in the field.

4. Focus on foundational skills: Learning technical skills such as programming, data analysis, and machine learning is a critical step in building a career in AI. However, it's critical to prioritize topic selection for your AI coursework.

5