In [1]:
import torch
from transformers import BitsAndBytesConfig
from llama_index.prompts import PromptTemplate
from llama_index.llms import HuggingFaceLLM


from llama_index import ServiceContext # configuration files for llama_index
from llama_index.readers import StringIterableReader  # transform str into documents
# from llama_index.response.notebook_utils import display_response
from llama_index import Document, VectorStoreIndex



In [3]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

hf_token = "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"
llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-2-7b-chat-hf",
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    query_wrapper_prompt=PromptTemplate("<s> [INST] {query_str} [/INST] "),
    context_window=3900,
    model_kwargs={"token": hf_token, "quantization_config": quantization_config},
    tokenizer_kwargs={"token": hf_token},
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5")
texts = ['a', "a", 'b', 'c']
# text_list = ['hello', 'world']
# documents = [Document(text=t) for t in text_list]           The same as StringIterableReader
documents = StringIterableReader().load_data(texts=texts)   
documents

vector_index = VectorStoreIndex.from_documents(documents, service_context=service_context)


In [8]:
service_context

ServiceContext(llm_predictor=LLMPredictor(system_prompt=None, query_wrapper_prompt=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>), prompt_helper=PromptHelper(context_window=3900, num_output=256, chunk_overlap_ratio=0.1, chunk_size_limit=None, separator=' '), embed_model=HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5', embed_batch_size=10, callback_manager=<llama_index.callbacks.base.CallbackManager object at 0x7fb1a4b46d30>, tokenizer_name='BAAI/bge-small-en-v1.5', max_length=512, pooling=<Pooling.CLS: 'cls'>, normalize=True, query_instruction=None, text_instruction=None, cache_folder=None), transformations=[SentenceSplitter(include_metadata=True, include_prev_next_rel=True, callback_manager=<llama_index.callbacks.base.CallbackManager object at 0x7fb1a4b46d30>, id_func=<function default_id_func at 0x7fb125d1ea60>, chunk_size=1024, chunk_overlap=200, separator=' ', paragraph_separator='\n\n\n', secondary_chunking_regex='[^,.;。？！]+[,.;。？！]?')], llama_logge

In [5]:

query_engine = vector_index.as_query_engine() #response_mode="compact"

response = query_engine.query("Who is tom")

# response.response     get the raw text output 
response


Response(response='Based on the provided context information, the answer to the query "Who is Tom?" is not possible to determine with certainty. The context information provided is:\n\n"a"\n\nWithout any additional information or prior knowledge, it is not possible to determine who "Tom" is or what he might be referring to. The term "Tom" could refer to any person, place, thing, or concept, and without more context or information, it is impossible to provide a definitive answer to the query.', source_nodes=[NodeWithScore(node=TextNode(id_='83c513f8-5765-499e-afaa-93accf907fca', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='ed352519-7735-44ab-b7e8-7f4ec0aa2267', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='5f546eb4606b5c2b7d2a449a5cc2bbb477ed5a246c7051ce871b12f2dbfc8419'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='e120fdac-5daf-4d64-8665-d2c

In [54]:
response.__dict__

{'response': 'Based on the context information provided, the answer to the query "Who is Tom?" is:\n\nTom is a school teacher who lives in the village and is in his late thirties. He is kind, dedicated, and has a passion for teaching and helping others. He is well-respected in the community and loved by his students for his patience and understanding.',
 'source_nodes': [NodeWithScore(node=TextNode(id_='6fd73eb3-a697-45df-b7ec-227b633b6fdd', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='009f3113-30bd-4262-8331-d0a4183138e1', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='556efb9aefdd2bb995bb01fbd0514f44c3bd5b9fb7946545b0f3f743a8093e3d')}, text='Tom is a kind and dedicated school teacher who has been living in the village his entire life. He is in his late thirties and has a passion for teaching and helping others. Tom is well-respected in the community and is

In [None]:
vector_index.ref_doc_info