In [40]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
import pickle
import os


In [41]:
# Set your file path. Keep it simple: data.txt in the current folder.
data_path = "data.txt"

loader = TextLoader(data_path, encoding="utf-8")
text = loader.load()
text[:1]  


[Document(metadata={'source': 'data.txt'}, page_content='Dr. A. P. J. Abdul Kalam was born on 15 October 1931 in Rameswaram, Tamil Nadu.\nHe came from a simple family, and his father was a boat owner while his mother was a homemaker.\nDespite financial struggles, Kalam was a bright and hardworking student from his childhood.\nHe sold newspapers as a boy to support his education and family.\nHis interest in science and flight began at an early age.\nHe studied physics at St. Joseph’s College in Tiruchirappalli.\nLater, he pursued aerospace engineering at the Madras Institute of Technology.\nHis teachers noticed his sharp mind and encouraged him to pursue research.\nAfter graduation, he joined the Defence Research and Development Organisation (DRDO).\nSoon, he moved to the Indian Space Research Organisation (ISRO).\nAt ISRO, Kalam played a major role in India’s first Satellite Launch Vehicle (SLV-III).\nThe SLV-III successfully launched the Rohini satellite in 1980.\nThis made India one 

In [42]:
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
text_splitter = splitter.split_documents(text)
len(text_splitter), text_splitter[0]


(13,
 Document(metadata={'source': 'data.txt'}, page_content='Dr. A. P. J. Abdul Kalam was born on 15 October 1931 in Rameswaram, Tamil Nadu.\nHe came from a simple family, and his father was a boat owner while his mother was a homemaker.\nDespite financial struggles, Kalam was a bright and hardworking student from his childhood.'))

In [43]:
embediing = OllamaEmbeddings(model="nomic-embed-text:latest")

db = FAISS.from_documents(text_splitter, embedding=embediing)
retriever = db.as_retriever()


In [44]:
model = Ollama(model="mistral:latest")

qa_chain = RetrievalQA.from_chain_type(
    llm=model,
    retriever=retriever
)


In [45]:
question = "who is apj abdual kalam give short in 2 lines"
response = qa_chain.invoke(question)
response["result"]


' Dr. A.P.J. Abdul Kalam was a renowned scientist and politician from India, known as the "Missile Man" for his work on India\'s missile development programs, particularly Agni and Prithvi. He served as the 11th President of India from 2002 to 2007 and was also an inspirational author whose books continue to motivate students worldwide.'

In [46]:
# This creates vectorstore.pkl in the current folder
with open("vectorstore.pkl", "wb") as f:
    pickle.dump(db, f)

os.path.getsize("vectorstore.pkl")  


45533

In [47]:
with open("vectorstore.pkl", "rb") as f:
    db_loaded = pickle.load(f)

retriever_loaded = db_loaded.as_retriever()

qa_chain_loaded = RetrievalQA.from_chain_type(
    llm=Ollama(model="mistral:latest"),
    retriever=retriever_loaded
)

qa_chain_loaded.invoke("give 2 lines about apj abdul kalam")["result"]


' Dr. A. P. J. Abdul Kalam was a renowned scientist who played a significant role in India\'s missile development program, earning him the title "Missile Man of India." He served as the 11th President of India and is widely respected for his leadership, vision, and inspiring life story. His books continue to motivate students across the globe.'