In [2]:
!pip install -U langchain langchain-community langchain-google-genai chromadb pypdf python-docx sentence-transformers




In [102]:
import os
os.environ["GOOGLE_API_KEY"] = "AIzaSyCnmlWHnf-f3DxyPHmN4Qa8KB9M6_1dYQM"


In [104]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
import os

documents = []
data_path = "data"

for file in os.listdir(data_path):
    path = os.path.join(data_path, file)

    if file.endswith(".pdf"):
        documents.extend(PyPDFLoader(path).load())

    elif file.endswith(".docx"):
        documents.extend(Docx2txtLoader(path).load())

print("Documents loaded:", len(documents))


Documents loaded: 4


In [106]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=150
)

chunks = text_splitter.split_documents(documents)

print("Total chunks created:", len(chunks))


Total chunks created: 4


In [108]:
for i, doc in enumerate(documents[:5]):
    
    print(f"Doc {i} length:", len(doc.page_content))


Doc 0 length: 607
Doc 1 length: 430
Doc 2 length: 434
Doc 3 length: 409


In [110]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

print("Local embeddings ready")


Local embeddings ready


In [111]:
from langchain_community.vectorstores import Chroma

vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="db"
)

vector_db.persist()
print("Vector DB created")


Vector DB created


In [112]:
retriever = vector_db.as_retriever(search_kwargs={"k": 4})
print("Retriever ready")


Retriever ready


In [130]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-3-flash-preview",
    temperature=0.2
)

print("Gemini LLM ready")


Gemini LLM ready


In [132]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template="""
You are a Railway Passenger Process Explainer Bot.

Answer ONLY using the provided context.
Do NOT book tickets.
Do NOT give live schedules.
If not found, say:
"Information not available in the provided documents."

Context:
{context}

Question:
{question}

Answer:
""",
    input_variables=["context", "question"]
)


In [134]:
retriever = vector_db.as_retriever(search_kwargs={"k": 4})
print("Retriever ready")


Retriever ready


In [136]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


In [138]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


In [140]:
rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG pipeline ready")


RAG pipeline ready


In [144]:
answer = rag_chain.invoke(
    "What are the rules for changing boarding point?"
)

print(answer)


Based on the provided guidelines, the rules for changing the boarding point are:

1. **Refund Policy:** If the boarding station is changed within 24 hours of departure, no refund is allowed except in exceptional cases such as train cancellation, coach non-attachment, or a delay beyond 3 hours.
2. **Boarding Rights:** After changing the boarding station, the passenger loses the right to board from the original station. Unauthorized boarding from the original station attracts fare and penalty.
3. **Restrictions:** A boarding point change is not allowed if:
   - The ticket is seized.
   - The PNR has the VIKALP option.
   - The ticket is an I-Ticket.
   - The ticket is a current booking.
4. **Additional Changes:** If the boarding station was changed at the time of booking, one more change is allowed via the "Booked Ticket History" section.
