In [1]:
import chromadb

In [3]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader

# Open document

In [11]:
loader = TextLoader("data/Lincoln_State_of_Union_1862.txt")

In [17]:
lincoln_speech_doc = loader.load()[0]

# Split document

In [16]:
splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)

In [18]:
chunked_docs = splitter.split_documents([lincoln_speech_doc])

Created a chunk of size 608, which is longer than the specified 500
Created a chunk of size 539, which is longer than the specified 500
Created a chunk of size 686, which is longer than the specified 500


# Create and store embeddings

In [20]:
embedding_function = OpenAIEmbeddings()

In [22]:
db = Chroma.from_documents(chunked_docs, embedding_function, persist_directory=".speech_db")

In [23]:
db.persist()

# query database

In [25]:
db_connection = Chroma(persist_directory=".speech_db", embedding_function=embedding_function)

In [27]:
db_connection.similarity_search("What did Lincoln say about slavery?")

[Document(metadata={'source': 'data/Lincoln_State_of_Union_1862.txt'}, page_content='Among the friends of the Union there is great diversity of sentiment and of policy in regard to slavery and the African race amongst us. Some would perpetuate slavery; some would abolish it suddenly and without compensation; some would abolish it gradually and with compensation: some would remove the freed people from us, and some would retain them with us; and there are yet other minor diversities. Because of these diversities we waste much strength in struggles among ourselves. By mutual concession we should harmonize and act together. This would be compromise, but it would be compromise among the friends and not with the enemies of the Union. These articles are intended to embody a plan of such mutual concessions. if the plan shall be adopted, it is assumed that emancipation will follow, at least in several of the States.\n\nAs to the first article, the main points are, first, the emancipation; seco

# Creating a retriever out of the connection

In [28]:
retriever = db_connection.as_retriever()

In [29]:
retriever.get_relevant_documents("slavery")

[Document(metadata={'source': 'data/Lincoln_State_of_Union_1862.txt'}, page_content='As to the second article, I think it would be impracticable to return to bondage the class of persons therein contemplated. Some of them, doubtless, in the property sense belong to loyal owners, and hence provision is made in this article for compensating such. The third article relates to the future of the freed people. It does not oblige, but merely authorizes Congress to aid in colonizing such as may consent. This ought not to be regarded as objectionable on the one hand or on the other, insomuch as it comes to nothing unless by the mutual consent of the people to be deported and the American voters, through their representatives in Congress.\n\nI can not make it better known than it already is that I strongly favor colonization; and yet I wish to say there is an objection urged against free colored persons remaining in the country which is largely imaginary, if not sometimes malicious.\n\nIt is ins