In [8]:
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain_community.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from dotenv import load_dotenv
from huggingface_hub import login
import os

import shutil

load_dotenv()
login(token=os.getenv("HF_TOKEN"))

CHROMA_PATH = "chroma"
DATA_PATH = "pdfs"


def main():
    generate_data_store()


def generate_data_store():
    documents = load_documents()
    chunks = split_text(documents)
    save_to_chroma(chunks)


def load_documents():
    files = [
        os.path.join(DATA_PATH, file)
        for file in os.listdir(DATA_PATH)
        if file.endswith(".pdf")
    ]
    documents = []
    for file in files:
        loader = PyPDFLoader(file)
        documents.extend(loader.load())
    return documents


def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

    document = chunks[10]
    print(document.page_content)
    print(document.metadata)

    return chunks


def save_to_chroma(chunks: list[Document]):
    # Clear out the database first.
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

    # Create a new DB from the documents.
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2"
    )
    db = Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_PATH)
    db.persist()
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")


if __name__ == "__main__":
    main()

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
Ignoring wrong pointing object 69 0 (offset 0)
Ignoring wrong pointing object 72 0 (offset 0)
Ignoring wrong pointing object 657 0 (offset 0)
Ignoring wrong pointing object 862 0 (offset 0)
Ignoring wrong pointing object 976 0 (offset 0)
Ignoring wrong pointing object 981 0 (offset 0)
Ignoring wrong pointing object 1132 0 (offset 0)
Ignoring wrong pointing object 1555 0 (offset 0)
Ignoring wrong pointing object 1675 0 (offset 0)
Ignoring wrong pointing object 1720 0 (offset 0)
Ignoring wrong pointing object 2094 0 (offset 0)
Ignoring wrong pointing object 2215 0 (offset 0)
Ignoring wrong pointing object 2268 0 (offset 0)
Ignoring wrong pointing object 2295 0 (offset 0)
Ignoring wrong pointing object 2413 0 (offset 0)
Ignoring wrong pointing object 2782 0 (offset 0)
Ignoring wrong pointing object 2784 0 (offset 0)
Ignoring wrong pointing object 3240 0 (offset 

Split 1404 documents into 19874 chunks.
Art Director: Vernon Boes
Manufacturing Planner: Becky Cross
Production Service: TECHarts
Photo and Text Researcher: Lumina Datamatics
Copy Editor: Kathi Townes, TECHarts
Illustrator: TECHarts
Text Designer: Diane Beasley
Cover Designer: Irene Morris, Morris Design
{'source': 'pdfs/Cálculo-nicolas-MacBook.pdf', 'page': 3, 'start_index': 392}
Saved 19874 chunks to chroma.


  db.persist()


In [10]:
import argparse
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

CHROMA_PATH = "chroma"

PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""


def main():
    # Create CLI.
    parser = argparse.ArgumentParser()
    parser.add_argument("query_text", type=str, help="The query text.")
    args = parser.parse_args()
    query_text = args.query_text

    # Initialize Hugging Face embeddings.
    embedding_function = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

    # Search the DB.
    results = db.similarity_search_with_relevance_scores(query_text, k=3)
    if len(results) == 0 or results[0][1] < 0.7:
        print(f"Unable to find matching results.")
        return

    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    print(prompt)

    # Load LLaMA model and tokenizer.
    model_name = (
        "meta-llama/Llama-2-7b-chat-hf"  # Replace with your preferred LLaMA model
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    # Generate response using the LLaMA model.
    generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
    response = generator(prompt, max_length=500, do_sample=True, temperature=0.7)

    response_text = response[0]["generated_text"]
    sources = [doc.metadata.get("source", None) for doc, _score in results]
    formatted_response = f"Response: {response_text}\nSources: {sources}"
    print(formatted_response)


if __name__ == "__main__":
    main()

usage: ipykernel_launcher.py [-h] query_text
ipykernel_launcher.py: error: the following arguments are required: query_text


SystemExit: 2

In [None]:
import argparse
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate

CHROMA_PATH = "chroma"

PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""


def main():
    query_text = "what is a integral"

    # Prepare the DB.
    embedding_function = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

    # Search the DB.
    results = db.similarity_search_with_relevance_scores(query_text, k=3)
    if len(results) == 0 or results[0][1] < 0.7:
        print(f"Unable to find matching results.")
        return

    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    print(prompt)

    model = pipeline("text-generation", model="meta-llama/Llama-2-7b-chat-hf")
    response_text = model.predict(prompt)

    sources = [doc.metadata.get("source", None) for doc, _score in results]
    formatted_response = f"Response: {response_text}\nSources: {sources}"
    print(formatted_response)


if __name__ == "__main__":
    main()

  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)


InvalidDimensionException: Embedding dimension 384 does not match collection dimensionality 768