In [1]:
import os
import pandas as pd

path = os.path.join('../data','financial_synthetic.csv')
df = pd.read_csv(path)

In [2]:
df.head(3)

Unnamed: 0,text,amount,label,metadata
0,Purchase at Netflix for shopping. Date: 2025-0...,208.07,0,merchant:Netflix
1,Purchase at Starbucks for groceries. Date: 202...,151.34,0,merchant:Starbucks
2,Purchase at Whole Foods for dining. Date: 2025...,40.57,0,merchant:Whole Foods


In [3]:
from langchain_core.documents import Document

docs = [Document(page_content=row['text'],
    metadata={
        'amount':row['amount'],
        'label':row['label'],
        'other_info':row['metadata']
    }             
    ) for _,row in df.iterrows()]

docs[:3]

[Document(metadata={'amount': 208.07, 'label': 0, 'other_info': 'merchant:Netflix'}, page_content='Purchase at Netflix for shopping. Date: 2025-02-15. Status: Cleared.'),
 Document(metadata={'amount': 151.34, 'label': 0, 'other_info': 'merchant:Starbucks'}, page_content='Purchase at Starbucks for groceries. Date: 2025-07-26. Status: Cleared.'),
 Document(metadata={'amount': 40.57, 'label': 0, 'other_info': 'merchant:Whole Foods'}, page_content='Purchase at Whole Foods for dining. Date: 2025-07-28. Status: Cleared.')]

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv
load_dotenv()

embedding_model = HuggingFaceEmbeddings(model='sentence-transformers/all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from langchain_community.vectorstores import Chroma,FAISS

vdb1 = Chroma.from_documents(
    documents=docs,
    embedding=embedding_model,
    persist_directory=os.path.join('../data','chroma')
)

vdb2 = FAISS.from_documents(
    documents=docs,
    embedding=embedding_model
)
vdb2.save_local(os.path.join('../data', 'faiss'))

AttributeError: module 'chromadb' has no attribute 'config'

In [None]:
!pip install cyborgdb[langchain]
!pip install cyborgdb-lite[langchain]

In [13]:
from cyborgdb_core.integrations.langchain import CyborgVectorStore
from cyborgdb_core import DBConfig

store = CyborgVectorStore.from_texts(
    texts=["hello world", "goodbye world","hello hello hello"],
    embedding="all-MiniLM-L6-v2",  # sentence-transformer name
    index_key=CyborgVectorStore.generate_key(),
    api_key="cyborg_36c89c513cac4c1c8bb8c093beefb71a",
    index_location=DBConfig('memory'),
    config_location=DBConfig('memory'),
    index_type="ivfflat",
    metric="cosine"
)

docs = store.similarity_search("hello")



In [14]:
docs

[Document(metadata={}, page_content='hello hello hello'),
 Document(metadata={}, page_content='hello world'),
 Document(metadata={}, page_content='goodbye world')]

In [15]:
import time
import sys
import os
from dataclasses import dataclass
from langchain_community.vectorstores import Chroma, FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv
load_dotenv()

chroma_db_path=os.path.join('../data','chroma_db')
faiss_db_path=os.path.join('../data','faiss_index')

embedding_model=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [14]:
os.listdir('../data')

['faiss_index', 'financial_synthetic.csv', 'chroma_db']

In [16]:
chroma_db=Chroma(
    persist_directory=chroma_db_path, 
    embedding_function=embedding_model
)