In [59]:
from dotenv import load_dotenv
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

import chromadb
from chromadb.config import Settings

load_dotenv()

# Init
embeddings = OpenAIEmbeddings()
textSplitter = CharacterTextSplitter(separator="\n",chunk_size=200,chunk_overlap=0)

client = chromadb.HttpClient(host="localhost",port="3005",settings=Settings(allow_reset=True))
client.heartbeat()
client.reset()  # resets the database
collection = client.create_collection("stephen")

chromaLocal = Chroma(client=client,embedding_function=embeddings,persist_directory="emb")

# Using chroma python built-in packages
# chromaLocal = Chroma(embedding_function=embeddings,persist_directory="emb")


# Data
loader = TextLoader("./facts.txt")
docs = loader.load_and_split(textSplitter)

# Add data to chroma
chromaLocal._collection = collection
chromaLocal.add_documents(docs)
# Save data to disk
chromaLocal.persist()

# Get similarity
result = chromaLocal.similarity_search_with_score("Zeus was the king of the Greek gods according to ancient Greek myth?",k=5)

for res in result:
    print("\n")
    print(res[1])
    print(res[0].page_content)




0.33766710405730715
23. The Pacific Ocean is the largest ocean on Earth, covering more than 60 million square miles.
24. Zeus was the king of the Greek gods according to ancient Greek myth.


0.4365785596869199
30. The first Olympic Games were held in Ancient Greece in 776 B.C.
31. The Mona Lisa has no visible eyebrows.
32. A ball of glass will bounce higher than a ball of rubber.


0.4769557954255885
35. Dolphins sleep with one eye open.  
36. There are 1,792 steps to the top of the Eiffel Tower.
37. Tug-of-war was an Olympic event between 1900 and 1920.


0.4906903990429799
10. The Great Wall of China is approximately 13,171 miles long.
11. Bananas are berries, but strawberries aren't.
12. The Sphinx of Giza has the body of a lion and the head of a human.


0.4951776118465587
40. Charlie Chaplin once won third place in a Charlie Chaplin look-alike contest.
41. Mount Everest is the tallest mountain in the world.
42. No two zebras have the same pattern of stripes.
