In [None]:
import sys
from pathlib import Path

# Add the parent directory of the nested module to sys.path
module_parent_dir = str(Path('../../').resolve())  # Adjust the relative path as needed
if module_parent_dir not in sys.path:
    sys.path.insert(0, module_parent_dir)

from datetime import datetime
from db_models import Conversation, Message, Embedding, db
from app import app

conversation_id = "2e17180c-c5d9-4ae5-8275-81ee8cbdbe1b"
messages_to_embed = ""

with app.app_context():
    conv = Conversation.query.get_or_404(conversation_id)
    for msg in conv.messages:

        timestamp = datetime.fromisoformat(msg.timestamp.replace("Z", ""))
        formatted_timestamp = timestamp.strftime("%Y-%m-%d %H:%M")

        new_msg = """\n{timestamp} - {sender}: {content}"""
        messages_to_embed += new_msg.format(timestamp=formatted_timestamp, sender=msg.sender, content=msg.content)

print(messages_to_embed)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from typing import Any, List


text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)

messages_to_embed_chunks = text_splitter.split_text(messages_to_embed)
print(messages_to_embed_chunks)
len(messages_to_embed_chunks)


In [None]:
import chromadb
from chromadb.config import Settings
from chromadb.utils import embedding_functions

persist_directory = 'vectorstores/long_term_memory_chroma'

client = chromadb.Client(Settings(
    chroma_db_impl="duckdb+parquet",
    persist_directory=persist_directory # Optional, defaults to .chromadb/ in the current directory
))

client.persist()

sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")

collection = client.get_or_create_collection(
    name="long_term_memory_collection", embedding_function=sentence_transformer_ef)
# collection = client.get_collection(
#     name="long_term_memory_collection", embedding_function=sentence_transformer_ef)


In [None]:
def process_embeddings(conversation_id, vectorstore_collection, embedding_strings):
    new_embeddings = []

    with app.app_context():
        # Get the conversation by ID
        conversation = Conversation.query.get_or_404(conversation_id)
        conversation.embedded = True
        db.session.commit()

        # Check if the conversation has any embeddings
        if conversation.embeddings:

            existing_embedding_ids = [
                embedding.id for embedding in conversation.embeddings]

            # Delete existing embeddings from vectorstore
            vectorstore_collection.delete(ids=existing_embedding_ids)

            # Delete existing embeddings from SQLite database
            Embedding.query.filter_by(conversation_id=conversation_id).delete()
            db.session.commit()

        # Create new embeddings and add them to the SQLite database
        for content in embedding_strings:
            new_embedding = Embedding(conversation_id=conversation_id)
            db.session.add(new_embedding)
            db.session.commit()
            new_embeddings.append({"id": new_embedding.id, "content": content})

        # Add new embeddings to vectorstore
        documents = [item['content'] for item in new_embeddings]
        ids = [item['id'] for item in new_embeddings]
        vectorstore_collection.add(documents=documents, ids=ids)

    return new_embeddings


result = process_embeddings(
    conversation_id, collection, messages_to_embed_chunks)
print(result)


In [5]:
results = collection.query(
    query_texts=["condition"],
    n_results=1,
)
# print(results["documents"][0][0])

In [None]:
collection.count()
collection.peek()

In [9]:

with app.app_context():
    Conversation.query.update({Conversation.embedded: False})
    db.session.commit()
    Embedding.query.delete()
    db.session.commit()
client.reset()

True