### Import & Load Packages

In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import VectorDBQA
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, TextLoader
from langchain.llms import CTransformers

In [2]:
def loadPdf(dataPath):
    chunk = DirectoryLoader(
        dataPath,
        glob="*.pdf",
        loader_cls = PyPDFLoader
    )
    return chunk.load()
documents = loadPdf('data/')

In [3]:
loader = TextLoader('./data/state.txt')
documents = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=False)
texts = text_splitter.split_documents(documents)

In [5]:
#Download embedding model
def download_hugging_face_embeddings(model):
    embeddings = HuggingFaceEmbeddings(model_name=model)
    return embeddings

embeddings = download_hugging_face_embeddings("sentence-transformers/all-MiniLM-L6-v2")
embeddings

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

### Save Chroma DB into local storage

In [6]:
persist_directory = 'db'
vectordb = Chroma.from_documents(documents=texts, embedding=embeddings, persist_directory=persist_directory)

In [7]:
vectordb.persist()

### Load stored DB from storage

In [8]:
llm=CTransformers(
    model="bin/llama-2-7b-chat.ggmlv3.q4_0.bin",              
    model_type="llama",
    config={'max_new_tokens':512,
    'temperature':0.8}
)
llm

CTransformers(client=<ctransformers.llm.LLM object at 0x000002B0B9A29ED0>, model='bin/llama-2-7b-chat.ggmlv3.q4_0.bin', model_type='llama', config={'max_new_tokens': 512, 'temperature': 0.8})

In [9]:
# Now we can load the persisted database from disk, and use it as normal. 
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
qa = VectorDBQA.from_chain_type(llm=llm, chain_type="stuff", vectorstore=vectordb)



In [10]:
query = "What did the president say about Ketanji Brown Jackson"
qa.invoke(query)

{'query': 'What did the president say about Ketanji Brown Jackson',
 'result': " The President said that he nominated Judge Ketanji Brown Jackson for Supreme Court Justice 4 days ago, describing her as one of our nation's top legal minds who will continue the legacy of excellence of retiring Justice Breyer."}