In [3]:
from langchain_chroma import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader


In [5]:
loader = TextLoader("speech.txt")
docs = loader.load()
docs

[Document(metadata={'source': 'speech.txt'}, page_content="Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning. \nIndividuals may also unintentionally communicate aspects of their social position through speech, such as sex, age, place of origin,\n physiological and mental condition, education, and experiences.\n\nWhile normally used to facilitate communication with others, people may also use speech without the intent to communicate. \nSpeech may nevertheless express emotions or desires; people talk to themselves sometimes in acts that are a development of what\nsome psychologists (e.g., Lev Vygotsky) have maintained is the use of sile

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=30)
splits = text_splitter.split_documents(docs)

In [7]:
splits

[Document(metadata={'source': 'speech.txt'}, page_content="Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning."),
 Document(metadata={'source': 'speech.txt'}, page_content='Individuals may also unintentionally communicate aspects of their social position through speech, such as sex, age, place of origin,\n physiological and mental condition, education, and experiences.'),
 Document(metadata={'source': 'speech.txt'}, page_content='While normally used to facilitate communication with others, people may also use speech without the intent to communicate. \nSpeech may nevertheless express emotions or desires; people talk to themselves somet

In [8]:
embeddings = OllamaEmbeddings(model="gemma:2b")
vector = Chroma.from_documents(splits,embeddings)
vector

  embeddings = OllamaEmbeddings(model="gemma:2b")


<langchain_chroma.vectorstores.Chroma at 0x1f87735f7f0>

In [9]:
## querying

query = "There are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various /" \
"aspects like enunciation, intonation, loudness, and tempo to convey meaning."

q = vector.similarity_search(query)
q[0].page_content

"Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning."

In [11]:
## saving to disk

vector = Chroma.from_documents(splits,embeddings,persist_directory="D:\Langchain\Vectorstore\chormadb")

In [12]:
#load from disk
vec = Chroma(persist_directory="D:\Langchain\Vectorstore\chormadb",embedding_function=embeddings)
docs = vec.similarity_search(query)
docs[0].page_content

"Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning."

In [13]:
### Retriver 
retriver = vector.as_retriever()
retriver.invoke(query)[0].page_content

"Speech is the use of the human voice as a medium for language. \nSpoken language combines vowel and consonant sounds to form units of meaning like words, which belong to a language's lexicon. \nThere are many different intentional speech acts, such as informing, declaring, asking, persuading, directing; acts may vary in various \naspects like enunciation, intonation, loudness, and tempo to convey meaning."