In [3]:
import os
from django.conf import settings
from django.contrib.auth.models import User
from langchain_openai import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.vectorstores import Chroma

In [7]:
# Define the path for the Chroma database
persist_directory = 'db'

def create_and_persist_db(user, document_path):
    """Create and persist a vector store for a specific user."""
    persist_directory = get_user_db_path(user)
    os.makedirs(persist_directory, exist_ok=True)

    # Load and process the documents
    loader = TextLoader(document_path, encoding='utf-8')
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)

    # Create the vector store
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
    vectorstore.persist()
    print(f"Database created and persisted for user {user.username}.")


In [6]:
create_and_persist_db()

Created a chunk of size 4579, which is longer than the specified 1000
Created a chunk of size 10601, which is longer than the specified 1000
Created a chunk of size 4132, which is longer than the specified 1000
Created a chunk of size 1440, which is longer than the specified 1000
Created a chunk of size 4505, which is longer than the specified 1000
Created a chunk of size 6090, which is longer than the specified 1000
Created a chunk of size 3659, which is longer than the specified 1000


Database created and persisted.


  warn_deprecated(


In [8]:
# Function to load the existing database and query it
def query_db(user, query):
    """Query the vector store for a specific user."""
    persist_directory = get_user_db_path(user)

    # Check if the user's database exists
    if not os.path.exists(persist_directory):
        raise ValueError(f"No database found for user {user.username}")

    # Load the persisted database
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

    # Create the RAG chain
    qa = RetrievalQA.from_chain_type(
        llm=OpenAI(),
        chain_type="stuff",
        retriever=vectorstore.as_retriever()
    )

    # Use the RAG system
    result = qa.invoke(query)
    return result["result"]
# Example usage
query = "Donne moi 3 citations aléatoire de Bouveresse"
result = query_db(query)
print(result)



1) "La pensée philosophique contribue à engendrer les formes les plus caractéristiques et les plus tenaces de la mythologie et de l’erreur, qu’elle devrait avoir en même temps pour fonction de dénoncer et de combattre."
2) "Les philosophes qui appellent aujourd’hui à l’insurrection contre tout espèce d’ordre, de logique ou de méthode font songer irrésistiblement à certains programmes politiques dans lesquels il est question de réaliser le bonheur de tout le monde en supprimant purement et simplement le gouvernement, les lois, les impôts, la police, les tribunaux et tous les règlements en usage."
3) "Les conventions et les règles qui gouvernent le monde de l’esprit deviennent automatiquement répressives, inacceptables et paralysantes, lorsqu’on a décidé d’oublier entièrement la raison d’être qui a pu les inciter et la fonction qu’elles peuvent remplir."
