In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS


In [4]:
# Load the documents
loader = TextLoader("speech.txt")
documents = loader.load()

# Split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=10)
texts = text_splitter.split_documents(documents)
texts

[Document(metadata={'source': 'speech.txt'}, page_content='THE MARVELLOUS THING IS THAT IT’S painless," he said. "That\'s how you know when it starts."'),
 Document(metadata={'source': 'speech.txt'}, page_content='"Is it really?"\n\n"Absolutely. I\'m awfully sorry about the odor though. That must bother you."'),
 Document(metadata={'source': 'speech.txt'}, page_content='"Don\'t! Please don\'t."'),
 Document(metadata={'source': 'speech.txt'}, page_content='"Look at them," he said. "Now is it sight or is it scent that brings them like that?"'),
 Document(metadata={'source': 'speech.txt'}, page_content='The cot the man lay on was in the wide shade of a mimosa tree and as he looked out past the shade onto the glare of the plain there were three of the big birds squatted obscenely, while in the sky a dozen more sailed, making quick-moving shadows as they passed.')]

In [6]:
embeddings = (
    OllamaEmbeddings(model="nomic-embed-text")
)

embeddings

OllamaEmbeddings(base_url='http://localhost:11434', model='nomic-embed-text', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=False, headers=None, model_kwargs=None)

In [7]:
db = FAISS.from_documents(texts, embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x233a545bec0>

In [9]:
# Now we can query the database
query = "What is marvelous thing?"
results = db.similarity_search(query)
results[0].page_content

'THE MARVELLOUS THING IS THAT IT’S painless," he said. "That\'s how you know when it starts."'

In [10]:
result_with_score = db.similarity_search_with_score(query)
result_with_score

[(Document(id='1199b226-097a-4749-b03b-cb38dacb25af', metadata={'source': 'speech.txt'}, page_content='THE MARVELLOUS THING IS THAT IT’S painless," he said. "That\'s how you know when it starts."'),
  np.float32(517.1804)),
 (Document(id='95720c6b-a58b-46d2-aeb4-a5d0df7cde8b', metadata={'source': 'speech.txt'}, page_content='"Is it really?"\n\n"Absolutely. I\'m awfully sorry about the odor though. That must bother you."'),
  np.float32(541.8161)),
 (Document(id='3c42c319-1cab-4f5d-b7be-120853adde4e', metadata={'source': 'speech.txt'}, page_content='"Look at them," he said. "Now is it sight or is it scent that brings them like that?"'),
  np.float32(559.803)),
 (Document(id='3fe36996-e9b7-406c-a6dc-2b392a235369', metadata={'source': 'speech.txt'}, page_content='The cot the man lay on was in the wide shade of a mimosa tree and as he looked out past the shade onto the glare of the plain there were three of the big birds squatted obscenely, while in the sky a dozen more sailed, making quic

In [12]:
# Saving the database
db.save_local("faiss_index")


In [13]:
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

result1 = new_db.similarity_search("What is marvelous thing?")

In [14]:
result1

[Document(id='1199b226-097a-4749-b03b-cb38dacb25af', metadata={'source': 'speech.txt'}, page_content='THE MARVELLOUS THING IS THAT IT’S painless," he said. "That\'s how you know when it starts."'),
 Document(id='95720c6b-a58b-46d2-aeb4-a5d0df7cde8b', metadata={'source': 'speech.txt'}, page_content='"Is it really?"\n\n"Absolutely. I\'m awfully sorry about the odor though. That must bother you."'),
 Document(id='3c42c319-1cab-4f5d-b7be-120853adde4e', metadata={'source': 'speech.txt'}, page_content='"Look at them," he said. "Now is it sight or is it scent that brings them like that?"'),
 Document(id='3fe36996-e9b7-406c-a6dc-2b392a235369', metadata={'source': 'speech.txt'}, page_content='The cot the man lay on was in the wide shade of a mimosa tree and as he looked out past the shade onto the glare of the plain there were three of the big birds squatted obscenely, while in the sky a dozen more sailed, making quick-moving shadows as they passed.')]