In [1]:
import chromadb

# Connect to ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_diary_db")
collection = chroma_client.get_collection(name="diary_entries")

# Check all stored dates
all_entries = collection.peek()  # View some stored entries
print("Sample entries:", all_entries)

InvalidCollectionException: Collection diary_entries does not exist.

In [1]:
import os
import torch
import numpy as np
from dotenv import load_dotenv
from pymongo import MongoClient
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain.schema import Document

In [1]:
pip install flair

Collecting flair
  Downloading flair-0.15.1-py3-none-any.whl.metadata (12 kB)
Collecting boto3>=1.20.27 (from flair)
  Downloading boto3-1.37.19-py3-none-any.whl.metadata (6.7 kB)
Collecting conllu<5.0.0,>=4.0 (from flair)
  Downloading conllu-4.5.3-py2.py3-none-any.whl.metadata (19 kB)
Collecting ftfy>=6.1.0 (from flair)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting gdown>=4.4.0 (from flair)
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting more-itertools>=8.13.0 (from flair)
  Downloading more_itertools-10.6.0-py3-none-any.whl.metadata (37 kB)
Collecting mpld3>=0.3 (from flair)
  Downloading mpld3-0.5.10-py3-none-any.whl.metadata (5.1 kB)
Collecting pptree>=3.1 (from flair)
  Downloading pptree-3.1.tar.gz (3.0 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting pytorch-revgrad>=0.2.0 (from flair)
  Downloading pytorch_revgrad-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting segtok>=1.5.11 (from flair)
  Downloading segtok-1.

In [2]:
# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")

In [4]:
# Connect to MongoDB
client = MongoClient(MONGO_URI)
db = client["diary_database"]
collection = db["diary_entries"]

# Detect device (GPU, MPS, or CPU)
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

Using device: mps


In [5]:
# Initialize LangChain embedding model on the detected device
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")

# MongoDB Atlas Vector Store
vector_store = MongoDBAtlasVectorSearch(
    mongo_uri=MONGO_URI,
    db_name="diary_database",
    collection="diary_entries",
    embedding=embedding_model
)

  embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def store_entries(processed_entries):
    """
    Store diary entries with embeddings in MongoDB using LangChain.
    """
    docs = []
    texts = [entry["text"] for entry in processed_entries]
    embeddings = embedding_model.embed_documents(texts)

    for entry, embedding in zip(processed_entries, embeddings):
        doc = Document(
            page_content=entry["text"],
            metadata={
                "date": entry["date"],
                "sentiment": entry["sentiment"],
                "emotion": entry["emotion"],  # Fixed KeyError
                "embedding": embedding
            }
        )
        docs.append(doc)

    # Add documents to MongoDB
    vector_store.add_documents(docs)
    print("Data successfully inserted into MongoDB!")

In [1]:
Ensure 'processed_entries' is defined before calling store_entries
if "processed_entries" in globals():
    store_entries(processed_entries)
else:
    print("Error: processed_entries is not defined!")

SyntaxError: invalid syntax (121415724.py, line 1)