In [8]:
# loading the text file conversation
from langchain.document_loaders import TextLoader

# use os module to go through the directory and load the text files
import os 
# Store all loaded documents
documents = []

data_dir = "data"  # replace with your directory
# Loop through all files in the directory
for filename in os.listdir(data_dir):
    if filename.endswith(".txt"):
        file_path = os.path.join(data_dir, filename)
        loader = TextLoader(file_path)
        documents.extend(loader.load())  # append loaded documents


In [10]:
# split the big text into smaller chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

In [11]:
# embedding the text chunks

from langchain.embeddings import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")



In [12]:
from langchain.vectorstores import FAISS

vectorstore = FAISS.from_documents(chunks, embedding=embedding_model)
retriever = vectorstore.as_retriever()


In [13]:
from langchain.llms import Ollama

# update this to load the different model

# Initialize Ollama with the gemma3 model
llm = Ollama(model="gemma3")
  

In [14]:
from langchain.chains import RetrievalQA
# Step 6: RAG chain
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

# Step 7: Ask a question
query = "What is the summary of the text?"
response = rag_chain(query)

print("Answer:", response['result'])

Answer: Here's a summary of the text:

The text consists of four SMS messages sent on March 29, 2025. The messages were sent from two different phone numbers (+918765060836 and +919984625856) and contain varying messages including "Test," "Action," "Ye test msg main using python automated way me bheja h..." and "Boobiee!!!!!".
