In [1]:
from sentence_transformers import SentenceTransformer
from typing import List

class MyEmbeddings:
        def __init__(self, model):
            self.model = SentenceTransformer(model, trust_remote_code=True)
    
        def embed_documents(self, texts: List[str]) -> List[List[float]]:
            return [self.model.encode(t).tolist() for t in texts]
        
        def embed_query(self, query: str) -> List[float]:
            return self.model.encode(query).tolist()

  from tqdm.autonotebook import tqdm, trange


In [2]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
dir_path = os.getcwd()

loader = DirectoryLoader(f"{dir_path}/texts", glob="**/*.txt",show_progress=True)
data = loader.load()
print("finished loading data")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

from langchain_chroma import Chroma
embeddings=MyEmbeddings("sentence-transformers/all-MiniLM-L6-v2")

chromadb = Chroma.from_documents(
    documents=all_splits,
    embedding=embeddings,
    persist_directory=False
)

100%|██████████| 6/6 [00:00<00:00,  7.08it/s]


finished loading data


In [3]:
question = "lee hsien loong wife"
docs = chromadb.similarity_search(question,k=10)
relevant_texts = [document for document in docs]
relevant_texts

[Document(metadata={'source': '/Users/shaoyang/Desktop/Agent/langgraph/rag/texts/leehsienloong.txt'}, page_content='Personal life Lee married his first wife, Wong Ming Yang, a Malaysian-born physician, on 20 May 1978. They have a daughter and a son, Li Xiuqi, born in 1981, and Li Yipeng, born in 1982. Three weeks after giving birth to their son, Wong died of a heart attack on 28 October 1982, at the age of 31. [201]'),
 Document(metadata={'source': '/Users/shaoyang/Desktop/Agent/langgraph/rag/texts/leehsienloong.txt'}, page_content='Lee remarried to Ho Ching in 1985, a promising civil servant who subsequently became the executive director and chief executive officer of Temasek Holdings. [202] They have two sons, Li Hongyi and Li Haoyi. [203] Their elder son, Li Hongyi, was a commissioned officer in the Singapore Armed Forces (SAF),[204] and is the deputy director of the Government Technology Agency. [205][206] Their younger son, Li Haoyi, is a software engineer who authors books on the