In [1]:
import pysqlite3
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

In [2]:
import os
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

In [3]:
current_dir = os.getcwd()
file_path = os.path.join(current_dir, "documents", "lord_of_the_rings.txt")
persistent_directory = os.path.join(current_dir, "db", "chroma_db")

In [4]:
if not os.path.exists(persistent_directory):
    print("Persistent directory does not exist. Initializing vector store...")
    if not os.path.exists(file_path):
        raise FileNotFoundError(
            f"The file {file_path} does not exist. Please check the path."
        )

    loader = TextLoader(file_path)
    documents = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50) 
    docs = text_splitter.split_documents(documents)

    print("\n--- Document Chunks Information ---")
    print(f"Number of document chunks: {len(docs)}")
    print(f"Sample chunk:\n{docs[0].page_content}\n")

    print("\n--- Creating embeddings ---")
    embeddings = OpenAIEmbeddings(
        model="text-embedding-3-small"
    )  
    print("\n--- Finished creating embeddings ---")

    print("\n--- Creating vector store ---")
    db = Chroma.from_documents(
        docs, embeddings, persist_directory=persistent_directory)
    print("\n--- Finished creating vector store ---")

else:
    print("Vector store already exists. No need to initialize.")

Created a chunk of size 1619, which is longer than the specified 1000
Created a chunk of size 1315, which is longer than the specified 1000
Created a chunk of size 1058, which is longer than the specified 1000
Created a chunk of size 1343, which is longer than the specified 1000
Created a chunk of size 1329, which is longer than the specified 1000
Created a chunk of size 1991, which is longer than the specified 1000
Created a chunk of size 1414, which is longer than the specified 1000
Created a chunk of size 1103, which is longer than the specified 1000
Created a chunk of size 1198, which is longer than the specified 1000
Created a chunk of size 1232, which is longer than the specified 1000
Created a chunk of size 1195, which is longer than the specified 1000
Created a chunk of size 1045, which is longer than the specified 1000
Created a chunk of size 1503, which is longer than the specified 1000
Created a chunk of size 1349, which is longer than the specified 1000
Created a chunk of s

Persistent directory does not exist. Initializing vector store...

--- Document Chunks Information ---
Number of document chunks: 43
Sample chunk:
This book is largely concerned with Hobbits, and from its pages a
reader may discover much of their character and a little of their
history. Further information will also be found in the selection from
the Red Book of Westmarch that has already been published, under
the title of The Hobbit. That story was derived from the earlier chapters of the Red Book, composed by Bilbo himself, the first Hobbit
to become famous in the world at large, and called by him There and
Back Again, since they told of his journey into the East and his return:
an adventure which later involved all the Hobbits in the great events
of that Age that are here related.

The story of the Ring is, of course, known to many, but what most do not realize is that the power of the Ring is subtle and its evil grows with time. That is why the burden falls upon Frodo Baggins, a si