In [5]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("speech.txt")
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=30)
docs = text_splitter.split_documents(documents)


Created a chunk of size 604, which is longer than the specified 500


In [6]:
docs

[Document(metadata={'source': 'speech.txt'}, page_content="Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning. \nIndividuals may also unintentionally communicate aspects of their social position through speech, such as sex, age, place of origin,\n physiological and mental condition, education, and experiences."),
 Document(metadata={'source': 'speech.txt'}, page_content="While normally used to facilitate communication with others, people may also use speech without the intent to communicate. \nSpeech may nevertheless express emotions or desires; people talk to themselves sometimes in acts that are a development of what\nsome psychologi

In [10]:
embeddings = OllamaEmbeddings(model = "gemma:2b")
db = FAISS.from_documents(docs,embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x2a6773b7520>

In [15]:
##querying

query = "There are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various /" \
"aspects like enunciation, intonation, loudness, and tempo to convey meaning."

docs = db.similarity_search(query)
docs[0].page_content

"Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning. \nIndividuals may also unintentionally communicate aspects of their social position through speech, such as sex, age, place of origin,\n physiological and mental condition, education, and experiences."

In [17]:
retriver = db.as_retriever()
docs = retriver.invoke(query)
docs[0].page_content

"Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning. \nIndividuals may also unintentionally communicate aspects of their social position through speech, such as sex, age, place of origin,\n physiological and mental condition, education, and experiences."

In [18]:
docs_score = db.similarity_search_with_score(query)
docs_score

[(Document(id='49f0a7a8-e953-4529-9861-3b05af0f1044', metadata={'source': 'speech.txt'}, page_content="Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning. \nIndividuals may also unintentionally communicate aspects of their social position through speech, such as sex, age, place of origin,\n physiological and mental condition, education, and experiences."),
  np.float32(1983.5215)),
 (Document(id='82013291-4527-4649-a040-5361a9ca6054', metadata={'source': 'speech.txt'}, page_content="While normally used to facilitate communication with others, people may also use speech without the intent to communicate. \nSpeech may nevertheless expres

In [22]:
embeddings_vec = embeddings.embed_query(query)
embeddings_vec

docs_score = db.similarity_search_by_vector(embeddings_vec)
docs_score[0].page_content

"Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning. \nIndividuals may also unintentionally communicate aspects of their social position through speech, such as sex, age, place of origin,\n physiological and mental condition, education, and experiences."

In [23]:
## Saving and Loading 

db.save_local("Faiss_index")

In [28]:
new_db = FAISS.load_local("Faiss_index",embeddings, allow_dangerous_deserialization=True)
doc = new_db.similarity_search(query)
doc[0].page_content

"Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning. \nIndividuals may also unintentionally communicate aspects of their social position through speech, such as sex, age, place of origin,\n physiological and mental condition, education, and experiences."