In [1]:
import chromadb

# Connect to ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_diary_db")
collection = chroma_client.get_collection(name="diary_entries")

# Check all stored dates
all_entries = collection.peek()  # View some stored entries
print("Sample entries:", all_entries)

InvalidCollectionException: Collection diary_entries does not exist.

In [1]:
import os
import torch
import numpy as np
from dotenv import load_dotenv
from pymongo import MongoClient
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain.schema import Document

In [2]:
pip install langchain-cohere

Collecting langchain-cohere
  Using cached langchain_cohere-0.4.3-py3-none-any.whl.metadata (6.6 kB)
Collecting cohere<6.0,>=5.12.0 (from langchain-cohere)
  Using cached cohere-5.14.0-py3-none-any.whl.metadata (3.4 kB)
Collecting types-pyyaml<7.0.0.0,>=6.0.12.20240917 (from langchain-cohere)
  Using cached types_PyYAML-6.0.12.20241230-py3-none-any.whl.metadata (1.8 kB)
Collecting fastavro<2.0.0,>=1.9.4 (from cohere<6.0,>=5.12.0->langchain-cohere)
  Using cached fastavro-1.10.0-cp311-cp311-macosx_10_9_universal2.whl.metadata (5.5 kB)
Using cached langchain_cohere-0.4.3-py3-none-any.whl (42 kB)
Using cached cohere-5.14.0-py3-none-any.whl (253 kB)
Using cached types_PyYAML-6.0.12.20241230-py3-none-any.whl (20 kB)
Using cached fastavro-1.10.0-cp311-cp311-macosx_10_9_universal2.whl (1.0 MB)
Installing collected packages: types-pyyaml, fastavro, cohere, langchain-cohere
Successfully installed cohere-5.14.0 fastavro-1.10.0 langchain-cohere-0.4.3 types-pyyaml-6.0.12.20241230
Note: you may nee

In [2]:
# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")

In [4]:
# Connect to MongoDB
client = MongoClient(MONGO_URI)
db = client["diary_database"]
collection = db["diary_entries"]

# Detect device (GPU, MPS, or CPU)
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

Using device: mps


In [5]:
# Initialize LangChain embedding model on the detected device
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")

# MongoDB Atlas Vector Store
vector_store = MongoDBAtlasVectorSearch(
    mongo_uri=MONGO_URI,
    db_name="diary_database",
    collection="diary_entries",
    embedding=embedding_model
)

  embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def store_entries(processed_entries):
    """
    Store diary entries with embeddings in MongoDB using LangChain.
    """
    docs = []
    texts = [entry["text"] for entry in processed_entries]
    embeddings = embedding_model.embed_documents(texts)

    for entry, embedding in zip(processed_entries, embeddings):
        doc = Document(
            page_content=entry["text"],
            metadata={
                "date": entry["date"],
                "sentiment": entry["sentiment"],
                "emotion": entry["emotion"],  # Fixed KeyError
                "embedding": embedding
            }
        )
        docs.append(doc)

    # Add documents to MongoDB
    vector_store.add_documents(docs)
    print("Data successfully inserted into MongoDB!")

In [1]:
Ensure 'processed_entries' is defined before calling store_entries
if "processed_entries" in globals():
    store_entries(processed_entries)
else:
    print("Error: processed_entries is not defined!")

SyntaxError: invalid syntax (121415724.py, line 1)