In [1]:
import torch
import transformers
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import PromptTemplate
import chromadb








In [2]:
device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'
device

'cuda:0'

In [3]:
path= 'meta-llama/Llama-3.2-1B-Instruct'

llm = HuggingFaceLLM(
    context_window=5000,
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.2, "do_sample": True},
    tokenizer_name=path,
    model_name=path,
    device_map="auto",
    # tokenizer_kwargs={"max_length": 1024},
    # model_kwargs={"torch_dtype": torch.bfloat16}
)

In [4]:
embed_model = HuggingFaceEmbedding(model_name="all-MiniLM-L6-v2", trust_remote_code=True)

In [5]:
# load data
loader = SimpleDirectoryReader(
            input_dir = "docs",
            required_exts=[".pdf"],
            recursive=True
        )

docs = loader.load_data()

In [6]:
chroma_client = chromadb.PersistentClient()
chroma_collection = chroma_client.get_or_create_collection('mydata')
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [7]:
Settings.llm = llm
Settings.chunk_size = 1024
Settings.embed_model = embed_model

In [8]:
index = VectorStoreIndex.from_documents(docs, storage_context=storage_context)

In [9]:
qa_prompt_tmpl_str = (
            "Context information is below.\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "Given the context information above I want you to answer the query in a crisp manner, in case you don't know the answer say 'I don't know!'\n"
            "Query: {query_str}\n"
            "Answer: "
            )
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

query_engine = index.as_query_engine()
query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl})
response = query_engine.query("What is Understand Myself?")
print(response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


 Understand Myself is a personality assessment and report tool that uses the Big Five Aspects model to describe personality traits and aspects. It is based on the Big Five personality model, which describes personality through the (Big Five) factors and each of their two aspects. The tool is designed to provide a comprehensive understanding of an individual's personality, including their agreeableness, extraversion, conscientiousness, openness, and neuroticism. It is prepared for Mihai Farcas and is intended to be used for general population comparisons.
