In [None]:
from langchain_community.document_loaders       import TextLoader
from langchain_community.vectorstores           import FAISS
from langchain_community.embeddings             import OllamaEmbeddings
from langchain.text_splitter                    import CharacterTextSplitter

## Text Splitting

In [None]:
loader      = TextLoader('data/speech.txt')
documents   = loader.load()

In [None]:
textSplitter    = CharacterTextSplitter(
    chunk_size      = 1000,
    chunk_overlap   = 30
)

document = textSplitter.split_documents(documents)

In [None]:
document

## Embeddings

In [None]:
embeddings      = OllamaEmbeddings(model="gemma:2b")

database        = FAISS.from_documents(
    documents,
    embeddings
)

In [None]:
database

In [None]:
query = "How does the speaker describe the desired outcome of the war?"

result = database.similarity_search(query)

In [None]:
print(f'Result:         {result}')
print(f'First Result:   {result[0].page_content}')

## Retriever

In [None]:
retriever = database.as_retriever()

result = retriever.invoke(query)
print(f'First Result:   {result[0].page_content}')

## Similarity Search with Score

In [None]:
similarityScoreForDocument = database.similarity_search_with_score(query)
similarityScoreForDocument

## Passing Vectors instead of Sentences

In [None]:
embeddingVector = embeddings.embed_query(query)
embeddingVector

In [None]:
result = database.similarity_search_with_score(embeddingVector)
result

## Save and Load the Database

In [None]:
database.save_local('faiss_index')

In [None]:
newDatabase = FAISS.load_local(
    'faiss_index', 
    embeddings,
    allow_dangerous_deserialization = True
)

In [None]:
documents = newDatabase.similarity_search(query)
documents