In [1]:
import argparse
import os
import shutil
from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain_community.vectorstores import Chroma
from tqdm import tqdm

# Embeedings using for create the Vector HuggingFaceEmbeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

In [2]:
CHROMA_PATH = "chroma"
DATA_PATH = "./data"

In [3]:
def get_embedding_function():
    embedding_model = "mixedbread-ai/mxbai-embed-large-v1"
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model,)
    return embeddings

In [4]:
def load_documents():
    document_loader = PyPDFDirectoryLoader(DATA_PATH)
    return document_loader.load()

In [5]:
def split_documents(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents(documents)

In [6]:
def calculate_chunk_ids(chunks):

    # This will create IDs like "data/monopoly.pdf:6:2"
    # Page Source : Page Number : Chunk Index

    last_page_id = None
    current_chunk_index = 0

    for chunk in tqdm(chunks, desc="Processing chunks"):
        source = chunk.metadata.get("source")
        page = chunk.metadata.get("page")
        current_page_id = f"{source}:{page}"

        # If the page ID is the same as the last one, increment the index.
        if current_page_id == last_page_id:
            current_chunk_index += 1
        else:
            current_chunk_index = 0

        # Calculate the chunk ID.
        chunk_id = f"{current_page_id}:{current_chunk_index}"
        last_page_id = current_page_id

        # Add it to the page meta-data.
        chunk.metadata["id"] = chunk_id

    return chunks

In [7]:
def add_to_chroma(chunks: list[Document]):
    # Load the existing database.
    db = Chroma(
        persist_directory=CHROMA_PATH, embedding_function=get_embedding_function()
    )

    # Calculate Page IDs.
    chunks_with_ids = calculate_chunk_ids(chunks)

    # Add or Update the documents.
    existing_items = db.get(include=[])  # IDs are always included by default
    existing_ids = set(existing_items["ids"])
    print(f"Number of existing documents in DB: {len(existing_ids)}")

    # Only add documents that don't exist in the DB.
    new_chunks = []
    for chunk in tqdm(chunks_with_ids, desc="Processing chunks"):
        if chunk.metadata["id"] not in existing_ids:
            new_chunks.append(chunk)

    if len(new_chunks):
        print(f"ðŸ‘‰ Adding new documents: {len(new_chunks)}")
        new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
        db.add_documents(new_chunks, ids=new_chunk_ids)
        db.persist()
    else:
        print("âœ… No new documents to add")

In [8]:
def clear_database():
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

In [9]:
def main():

    # Check if the database should be cleared (using the --clear flag).
    # parser = argparse.ArgumentParser()
    # parser.add_argument("--reset", action="store_true", help="Reset the database.")
    # args = parser.parse_args()
    # if args.reset:
    #     print("âœ¨ Clearing Database")
    #     clear_database()

    # Create (or update) the data store.
    documents = load_documents()
    chunks = split_documents(documents)
    add_to_chroma(chunks)

In [10]:
main()

  from tqdm.autonotebook import tqdm, trange
  warn_deprecated(
Processing chunks: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 620/620 [00:00<00:00, 2951723.59it/s]


Number of existing documents in DB: 0


Processing chunks: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 620/620 [00:00<00:00, 3386026.67it/s]


ðŸ‘‰ Adding new documents: 620


  warn_deprecated(


In [11]:
import time

def time_duration_check():
    bot_response = "My name is Shathis Kannan Vijayakumar and Also I want to talk with you"
    words = bot_response.split()
    
    start_time = time.time()
    
    for word in words:
        yield word + " "
        time.sleep(0.05)

    
    end_time = time.time()
    creation_time = end_time - start_time
    print(f"Text creation time: {creation_time:.6f} seconds")
    yield creation_time

# Run the function and iterate through the generator
for item in time_duration_check():
    if isinstance(item, str):
        print(item, end='', flush=True)
    else:
        creation_time = item

print()  # Add a newline at the end
print(f"Total creation time: {creation_time:.6f} seconds")

My name is Shathis Kannan Vijayakumar and Also I want to talk with you Text creation time: 0.759769 seconds

Total creation time: 0.759769 seconds


In [9]:
from IPython.display import display, Markdown, Latex
display(Markdown("""My friend, let\'s dive into the world of financial literacy with "Rich Dad Poor Dad" by Robert Kiyosaki. Here\'s a step-by-step breakdown of the book\'s key takeaways:\n\n**Step 1: Understand the difference between assets and liabilities**\nKiyosaki explains that assets generate income, while liabilities cost you money. He emphasizes the importance of building assets, such as real estate, stocks, or businesses, to create wealth.\n\n**Step 2: Don\'t work for money, make money work for you**\nThe author stresses that traditional employment is not the key to financial freedom. Instead, focus on creating passive income streams that can generate wealth without requiring your direct involvement.\n\n**Step 3: Mind your own business**\nKiyosaki encourages readers to focus on building their own wealth, rather than relying on a salary or someone else\'s business. This means taking control of your financial education and making informed decisions about your money.\n\n**Step 4: Taxes and accounting**\nThe book highlights the importance of understanding taxes and accounting to minimize your tax liability and maximize your wealth. Kiyosaki advocates for seeking professional advice to optimize your financial strategy.\n\n**Step 5: The power of financial education**\nKiyosaki emphasizes the importance of financial literacy and education in achieving financial freedom. He encourages readers to continuously learn and improve their financial knowledge to make informed decisions.\n\n**Step 6: Overcome fear and take action**\nThe author stresses that fear and doubt can hold you back from achieving financial success. He encourages readers to take calculated risks and take action towards their financial goals.\n\n**Step 7: Build multiple income streams**\nKiyosaki advocates for diversifying your income streams to reduce financial risk. This can include investing in real estate, stocks, or starting a side business.\n\n**Step 8: Give back and build wealth**\nThe book concludes by emphasizing the importance of giving back to your community and building wealth that benefits others. Kiyosaki encourages readers to use their wealth to make a positive impact on the world.\n\nThat\'s a summary of the key takeaways from "Rich Dad Poor Dad"! I hope this helps you on your financial journey, my friend"""))

display(Latex('\pi'))

  display(Latex('\pi'))


My friend, let's dive into the world of financial literacy with "Rich Dad Poor Dad" by Robert Kiyosaki. Here's a step-by-step breakdown of the book's key takeaways:

**Step 1: Understand the difference between assets and liabilities**
Kiyosaki explains that assets generate income, while liabilities cost you money. He emphasizes the importance of building assets, such as real estate, stocks, or businesses, to create wealth.

**Step 2: Don't work for money, make money work for you**
The author stresses that traditional employment is not the key to financial freedom. Instead, focus on creating passive income streams that can generate wealth without requiring your direct involvement.

**Step 3: Mind your own business**
Kiyosaki encourages readers to focus on building their own wealth, rather than relying on a salary or someone else's business. This means taking control of your financial education and making informed decisions about your money.

**Step 4: Taxes and accounting**
The book highlights the importance of understanding taxes and accounting to minimize your tax liability and maximize your wealth. Kiyosaki advocates for seeking professional advice to optimize your financial strategy.

**Step 5: The power of financial education**
Kiyosaki emphasizes the importance of financial literacy and education in achieving financial freedom. He encourages readers to continuously learn and improve their financial knowledge to make informed decisions.

**Step 6: Overcome fear and take action**
The author stresses that fear and doubt can hold you back from achieving financial success. He encourages readers to take calculated risks and take action towards their financial goals.

**Step 7: Build multiple income streams**
Kiyosaki advocates for diversifying your income streams to reduce financial risk. This can include investing in real estate, stocks, or starting a side business.

**Step 8: Give back and build wealth**
The book concludes by emphasizing the importance of giving back to your community and building wealth that benefits others. Kiyosaki encourages readers to use their wealth to make a positive impact on the world.

That's a summary of the key takeaways from "Rich Dad Poor Dad"! I hope this helps you on your financial journey, my friend

<IPython.core.display.Latex object>