In [1]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
loader = TextLoader("longSpeech.txt")
data = loader.load()
data

[Document(metadata={'source': 'longSpeech.txt'}, page_content='I am happy to join with you today in what will go down in history as the greatest demonstration for freedom in the history of our nation.\n\nFive score years ago, a great American, in whose symbolic shadow we stand today, signed the Emancipation Proclamation. This momentous decree came as a great beacon light of hope to millions of Negro slaves who had been seared in the flames of withering injustice. It came as a joyous daybreak to end the long night of their captivity.\n\nBut one hundred years later, the Negro still is not free. One hundred years later, the life of the Negro is still sadly crippled by the manacles of segregation and the chains of discrimination. One hundred years later, the Negro lives on a lonely island of poverty in the midst of a vast ocean of material prosperity. One hundred years later, the Negro is still languished in the corners of American society and finds himself an exile in his own land. And so

In [3]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap = 20)
splits = text_splitter.split_documents(data)
splits

[Document(metadata={'source': 'longSpeech.txt'}, page_content='I am happy to join with you today in what will go down in history as the greatest demonstration for freedom in the history of our nation.'),
 Document(metadata={'source': 'longSpeech.txt'}, page_content='Five score years ago, a great American, in whose symbolic shadow we stand today, signed the Emancipation Proclamation. This momentous decree came as a great beacon light of hope to millions of Negro'),
 Document(metadata={'source': 'longSpeech.txt'}, page_content='millions of Negro slaves who had been seared in the flames of withering injustice. It came as a joyous daybreak to end the long night of their captivity.'),
 Document(metadata={'source': 'longSpeech.txt'}, page_content='But one hundred years later, the Negro still is not free. One hundred years later, the life of the Negro is still sadly crippled by the manacles of segregation and the chains of discrimination. One'),
 Document(metadata={'source': 'longSpeech.txt'}

In [4]:
embedding = OpenAIEmbeddings()
vectorDB = Chroma.from_documents(splits, embedding=embedding)
vectorDB

<langchain_chroma.vectorstores.Chroma at 0x22790c15160>

In [5]:
## query
query1 = "What is the speaker trying to say?"
query2 = "Why is the speaker not satisfied?"

docs_result = vectorDB.similarity_search(query2)
docs_result[0].page_content

'There are those who are asking the devotees of civil rights, "When will you be satisfied?" We can never be satisfied as long as the Negro is the victim of the unspeakable horrors of police brutality.'

In [6]:
# retriever

retriever = vectorDB.as_retriever()
retriever_result = retriever.invoke(query2)
retriever_result[0].page_content

'There are those who are asking the devotees of civil rights, "When will you be satisfied?" We can never be satisfied as long as the Negro is the victim of the unspeakable horrors of police brutality.'

In [13]:
# Similarity search with score

docs_and_score = vectorDB.similarity_search_with_score(query2)
docs_and_score

[(Document(id='f0952cc9-3820-41a8-b40d-ebc7f7bb0689', metadata={'source': 'longSpeech.txt'}, page_content='There are those who are asking the devotees of civil rights, "When will you be satisfied?" We can never be satisfied as long as the Negro is the victim of the unspeakable horrors of police brutality.'),
  0.39089635014533997),
 (Document(id='ae6c17e2-e9f2-4f6e-aede-5b68d2f0dc44', metadata={'source': 'longSpeech.txt'}, page_content='for which to vote. No, no, we are not satisfied, and we will not be satisfied until "justice rolls down like waters, and righteousness like a mighty stream."'),
  0.39409753680229187),
 (Document(id='06337ff7-d75b-4601-880b-2f026027c3b5', metadata={'source': 'longSpeech.txt'}, page_content='Instead of honoring this sacred obligation, America has given the Negro people a bad check, a check which has come back marked "insufficient funds."'),
  0.4316622316837311),
 (Document(id='e5245681-2467-4db5-b5c3-5ef441252e68', metadata={'source': 'longSpeech.txt'},

In [8]:
embeddings_vector = embedding.embed_query(query2)
resultByVector = vectorDB.similarity_search_by_vector(embeddings_vector)
resultByVector[0].page_content

'There are those who are asking the devotees of civil rights, "When will you be satisfied?" We can never be satisfied as long as the Negro is the victim of the unspeakable horrors of police brutality.'

In [10]:
# Saving and Loading
vectorDB_local = Chroma.from_documents(documents=splits,embedding=embedding,persist_directory="./chroma.db")

In [11]:
# load from disk
vectorDb_fromLocal = Chroma(persist_directory="./chroma.db",embedding_function=embedding)

In [12]:
docs = vectorDb_fromLocal.similarity_search(query2)
print(docs[0].page_content)

There are those who are asking the devotees of civil rights, "When will you be satisfied?" We can never be satisfied as long as the Negro is the victim of the unspeakable horrors of police brutality.
