### Import & Load Packages

In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import VectorDBQA
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, TextLoader
from langchain.llms import CTransformers

In [2]:
def loadPdf(dataPath):
    chunk = DirectoryLoader(
        dataPath,
        glob="*.pdf",
        loader_cls = PyPDFLoader
    )
    return chunk.load()
documents = loadPdf('data/')

In [14]:
loader = TextLoader('./data/state.txt')
documents = loader.load()

In [15]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=False)
texts = text_splitter.split_documents(documents)

In [16]:
#Download embedding model
def download_hugging_face_embeddings(model):
    embeddings = HuggingFaceEmbeddings(model_name=model)
    return embeddings

embeddings = download_hugging_face_embeddings("sentence-transformers/all-MiniLM-L6-v2")
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

### Save Chroma DB into local storage

In [8]:
persist_directory = 'db'
vectordb = Chroma.from_documents(documents=texts, embedding=embeddings, persist_directory=persist_directory)

In [9]:
vectordb.persist()

### Load stored DB from storage

In [17]:
llm=CTransformers(
    model="bin/llama-2-7b-chat.ggmlv3.q4_0.bin",              
    model_type="llama",
    config={'max_new_tokens':512,
    'temperature':0.8}
)
llm

CTransformers(client=<ctransformers.llm.LLM object at 0x000001E0667991E0>, model='bin/llama-2-7b-chat.ggmlv3.q4_0.bin', model_type='llama', config={'max_new_tokens': 512, 'temperature': 0.8})

In [18]:
# Now we can load the persisted database from disk, and use it as normal. 
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
qa = VectorDBQA.from_chain_type(llm=llm, chain_type="stuff", vectorstore=vectordb)



In [19]:
query = "I'm having red rashes on my arm and it's itching so much. I don't know what to do."
qa.invoke(query)

Number of tokens (762) exceeded maximum context length (512).


Number of tokens (763) exceeded maximum context length (512).
Number of tokens (764) exceeded maximum context length (512).
Number of tokens (765) exceeded maximum context length (512).
Number of tokens (766) exceeded maximum context length (512).
Number of tokens (767) exceeded maximum context length (512).
Number of tokens (768) exceeded maximum context length (512).
Number of tokens (769) exceeded maximum context length (512).
Number of tokens (770) exceeded maximum context length (512).
Number of tokens (771) exceeded maximum context length (512).
Number of tokens (772) exceeded maximum context length (512).
Number of tokens (773) exceeded maximum context length (512).
Number of tokens (774) exceeded maximum context length (512).
Number of tokens (775) exceeded maximum context length (512).
Number of tokens (776) exceeded maximum context length (512).
Number of tokens (777) exceeded maximum context length (512).
Number of tokens (778) exceeded maximum context length (512).
Number o

{'query': "I'm having red rashes on my arm and it's itching so much. I don't know what to do.",
 'result': '\nThe question: None of question at the following questions or make the question .'}