In [2]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [3]:
api_wrapper=WikipediaAPIWrapper(top_k_results=1,doc_content_chars_max=200)
wiki=WikipediaQueryRun(api_wrapper=api_wrapper)

In [4]:
wiki.run("railways")  


'Page: Rail transport\nSummary: Rail transport (also known as train transport) is a means of transport using wheeled vehicles running in tracks, which usually consist of two parallel steel rails. Rail t'

In [5]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

# Step 1: Load data from the web
loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = loader.load()

# Step 2: Split documents into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = splitter.split_documents(docs)

# Step 3: Use better embeddings (BAAI/bge-small-en-v1.5)
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

# Step 4: Create vector store
vectordb = FAISS.from_documents(documents, embeddings)

# Step 5: Get retriever
retriever = vectordb.as_retriever()

retriever


USER_AGENT environment variable not set, consider setting it to identify your requests.
  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")


VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001CCFBF881D0>, search_kwargs={})

In [6]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local("faiss_index_krcl", embeddings, allow_dangerous_deserialization=True)


retriever = db.as_retriever()

In [7]:
tools=[retriever,wiki]

In [8]:
tools

[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001CCCECFCD90>, search_kwargs={}),
 WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\swanu\\anaconda3\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=200))]

In [9]:
from langchain_community.llms import Ollama

llm = Ollama(model="mistral", temperature=0)


  llm = Ollama(model="mistral", temperature=0)


In [10]:
from langchain.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
You are an expert assistant answering questions about railway operations and safety rules from the *KRCL General and Subsidiary Rules 2020* manual.

Your task is to provide accurate, section-based answers **only** from the provided context. Do not guess. Do not invent rules. If the answer cannot be found in the context, clearly state: "The information is not available in the provided rules."

Warning: Your response may be used by railway staff in real operational scenarios. Wrong or misleading answers may lead to **accidents or disciplinary action**. Respond carefully and precisely.

Instructions:
- Identify the correct rule number and section from the context.
- Provide the **exact wording** or an accurate summary.
- Format the rule reference like this: *Rule 2.03 — Knowledge of Rules*.
- If multiple rules apply, cite each.
- If the answer is not in the context, **say so clearly**.

I'll tip you $1000 dollars everytime you give correct answer.
When you can't find anything use wiki tool to give answer from wikipedia but explicitly mentions it is not from the *KRCL General and Subsidiary Rules 2020* manual.

<context>
{context}
</context>

Question: {input}

Answer (with rule reference):
""")

In [18]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import Tool, AgentExecutor, create_tool_calling_agent
from langchain_community.llms import Ollama
from langchain.tools import WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper

# 1. Wrap your retriever in a function that returns a string
def krcl_retriever_tool(query: str) -> str:
    """Retrieve rule-based context from FAISS for a given query."""
    results = retriever.get_relevant_documents(query)
    if not results:
        return "The information is not available in the provided rules."
    return "\n\n".join([doc.page_content for doc in results])


# 3. Define tools
tools = [
    Tool(
        name="KRCLRulesRetriever",
        func=krcl_retriever_tool,
        description="Retrieves accurate rule-based answers from the KRCL G&SR 2020 manual. Use for railway safety and operations questions."
    ),
    wiki
]

# 4. Define the prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert assistant answering questions about railway operations and safety rules from the *KRCL General and Subsidiary Rules 2020* manual.

Your task is to provide accurate, section-based answers **only** from the provided context. Do not guess. Do not invent rules. If the answer cannot be found in the context, clearly state: "The information is not available in the provided rules."

Warning: Your response may be used by railway staff in real operational scenarios. Wrong or misleading answers may lead to **accidents or disciplinary action**. Respond carefully and precisely.

Instructions:
- Identify the correct rule number and section from the context.
- Provide the **exact wording** or an accurate summary.
- Format the rule reference like this: *Rule 2.03 — Knowledge of Rules*.
- If multiple rules apply, cite each.
- If the answer is not in the context, **say so clearly**.

When you can't find anything use the Wikipedia tool, but **explicitly mention** that it's not from the KRCL manual."""),
    
    MessagesPlaceholder(variable_name="chat_history"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),  # 👈 Add this line
    ("user", "{input}")
])


# 5. Load Ollama model (Mistral)
llm = Ollama(model="mistral", temperature=0)


In [19]:
# 6. Create the agent
from langchain.agents import initialize_agent
from langchain.agents import AgentType
agent_executor = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)



In [21]:

# Now run
response = agent_executor.invoke({"input": "What is Gangman?"})
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m This question seems to be a typo or misunderstanding. The correct term might be "Gangnam," which is a district in Seoul, South Korea. However, since I don't have access to Wikipedia for this specific task, I can't provide the detailed information about Gangnam that Wikipedia would offer.

To answer this question accurately, I would need to use Wikipedia. But since I don't have direct access to it, I will instead use KRCLRulesRetriever to find if there is any rule or information related to "Gangman" in the KRCL G&SR 2020 manual.

Action: KRCLRulesRetriever
Action Input: "Gangman"[0m
Observation: [36;1m[1;3mGateman,

S.R. 15.17 (1) Note: In Konkan Railway gangman is designated as Trackman and gangmate
is designated as PWS/APWS.
B. THE WORKING OF LORRIES, TROLLIES AND MOTOR TROLLIES

S.R.15.12(2) NOTE : In Konkan Railway Inspector way is designated as JE/SE (P.Way) and gangman is designated as Trackman and gangmate is design

In [22]:
from langchain.agents import Tool, initialize_agent, AgentType
from langchain_community.llms import Ollama
from langchain_core.prompts import MessagesPlaceholder, ChatPromptTemplate

# Define retriever tool
def krcl_retriever_tool(query: str) -> str:
    results = retriever.get_relevant_documents(query)
    if not results:
        return "The information is not available in the provided rules."
    return "\n\n".join([doc.page_content for doc in results])

# Tools list
tools = [
    Tool(
        name="KRCLRulesRetriever",
        func=krcl_retriever_tool,
        description="Use this tool to retrieve rule-based answers from the KRCL G&SR 2020 manual. Use for railway safety and operations questions."
    )
]

# Load model
llm = Ollama(model="mistral", temperature=0)

# Create the agent
agent_executor = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

# Prompt is embedded implicitly; make sure your tool output is precise
response = agent_executor.invoke({"input": "What is Gangman?"})
print(response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m To find the definition of a term in the KRCL G&SR 2020 manual, I should use the `KRCLRulesRetriever` tool.

Action: KRCLRulesRetriever
Action Input: "Gangman"[0m
Observation: [36;1m[1;3mGateman,

S.R. 15.17 (1) Note: In Konkan Railway gangman is designated as Trackman and gangmate
is designated as PWS/APWS.
B. THE WORKING OF LORRIES, TROLLIES AND MOTOR TROLLIES

S.R.15.12(2) NOTE : In Konkan Railway Inspector way is designated as JE/SE (P.Way) and gangman is designated as Trackman and gangmate is designated as PWS/APWS.

15.11 GANGMATE IN EACH GANG:-
Each Inspector of Way or Works shall see that in every gang employed in his length of line there is a competent Gangmate.[0m
Thought:[32;1m[1;3m The term "Gangman" in the context of Konkan Railway is referred to as "Trackman" and "PWS/APWS". This person works alongside an Inspector of Way or Works, ensuring that each gang employed in their length of line has a competent Ga

In [None]:
# --- Agent Creation ---
prefix = """
# KRCL Rules Expert System v3.1
You are a Railway Safety-Critical Knowledge Assistant responsible for providing rule-based answers based strictly on the KRCL General and Subsidiary Rules 2020.

## 🚨 Operational Mandate:
- Precision is mandatory. Responses must avoid errors that could lead to accidents or disciplinary action.
- If uncertain or no relevant rule exists: respond with **"Not specified in KRCL rules."**

## 📚 Knowledge Protocol:
1. **Primary Source (Required):**
   - Source: KRCL General and Subsidiary Rules 2020
   - Always begin answers with **exact rule references**
   - Use full rule identifiers (e.g., GR 5.01(3), SR 2.14)
   - Quote verbatim whenever possible

2. **Secondary Source (Restricted Use – Wikipedia/Public)**
   - Only use if:
     a) It's a terminology/definition query  
     b) No KRCL rule applies  
     c) Answer has **no operational impact**  
   - Must include notice:
     🔍 [External Reference]  
     "This information is not in KRCL rules but based on public sources..."

## 🧭 Response Format:
### 1. Operational Answer:
[Direct, precise response]

### 2. Rule Evidence:
#### [Rule Number] [Rule Title]
• (1) [Exact wording]
• (2) [Next clause]
  - (a) [Sub-clause if any]

Separate multiple rules with `---`

### 3. External Reference (if applicable):
🔍 [External Source]
• [Brief, factual note]
• "KRCL rules govern actual implementation"

## ❌ Prohibitions:
- Do **not** mix rule content with external knowledge
- Do **not** paraphrase safety-critical rules
- Do **not** give procedures unless cited directly from rules

## 🔎 Special Cases:
- Definitions: “As defined in GR X.Y: [exact text]”
- Procedures: “Per SR X.Y(2): [step-by-step]”
- If no info exists: “Not specified in KRCL rules”
- If conflicting: “Rule GR X.Y takes precedence”

You must use the `KRCLRulesRetriever` tool to retrieve all KRCL rules. Do not guess.
"""
