# Road Safety Intervention GPT

# 1. Install dependencies
# %pip install langchain langchain-community sentence-transformers faiss-cpu transformers torch accelerate huggingface_hub

In [1]:
import os
import json
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# ==============================================
# Load Road Safety Knowledge Base
# ==============================================
FILE_PATH = "knowledge_base.json"

with open(FILE_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

# Create text chunks from full_text fields
all_texts = [item["full_text"] for item in data]
all_metadata = [item["metadata"] for item in data]

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=120,
    length_function=len
)

chunks = []
chunk_meta = []
for txt, meta in zip(all_texts, all_metadata):
    split = text_splitter.split_text(txt)
    for c in split:
        chunks.append(c)
        chunk_meta.append(meta)

In [3]:
# ==============================================
# Build Vector Store
# ==============================================
embeddings = SentenceTransformerEmbeddings(model_name='BAAI/bge-large-en-v1.5')
vector_store = FAISS.from_texts(chunks, embeddings, metadatas=chunk_meta)

  embeddings = SentenceTransformerEmbeddings(model_name='BAAI/bge-large-en-v1.5')


In [None]:
# ==============================================
# Load Llama 3.2 3B
# ==============================================
HF_TOKEN = ""
model_name = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    token=HF_TOKEN
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

llm_generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=900
)

`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:03<00:00,  1.72s/it]
Some parameters are on the meta device because they were offloaded to the disk.
Device set to use mps


In [5]:
# ==============================================
# Road Safety Intent Detection
# ==============================================
def detect_intent(query):
    q = query.lower()
    if "cost" in q or "price" in q or "estimate" in q:
        return "cost_estimate"
    if "fix" in q or "intervention" in q or "solution" in q:
        return "recommend_fix"
    if "clause" in q or "standard" in q or "compliance" in q:
        return "compliance_check"
    return "general_query"

In [6]:
# ==============================================
# RAG Generation
# ==============================================
# New: Structured Answer Format (Topics + Detailed Explanation)
# The bot will now output:
# 1. Problem Summary
# 2. Applicable IRC Clauses
# 3. Recommended Interventions
# 4. Step-by-Step Fix Guide
# 5. Cost Estimate (if intent matches)
# 6. Final Answer

# ==============================================
def generate_answer(context, query, intent):
    """
    Generates a **structured IRC-compliant answer** with topics:
    - Problem Interpretation
    - Relevant IRC Clauses
    - Recommended Intervention
    - Why This Works
    - Step-by-Step Fix Guide
    - Cost Estimate (if applicable)
    - Compliance Check (if applicable)
    """

    context_text = "\n".join(context)

    # === Base System Prompt ===
    STRUCTURE_TEMPLATE = """
### üîç Problem Interpretation
Provide a clear explanation of what the user's query means.

### üìò Relevant IRC Clauses
List all related IRC clauses available in the context.
(cite exactly from metadata, e.g., IRC:67-2022 - Clause 14.4)

### üõ†Ô∏è Recommended Intervention
Explain the precise intervention required.
Base entirely on IRC standards.

### üß© Why This Works
Explain engineering justification according to IRC.

### üìù Step-by-Step Fix Guide
Provide clear numbered steps for execution (3‚Äì7 steps).

### üí∞ Estimated Cost (if applicable)
Provide a cost range in ‚Çπ using Indian engineering rates.
If user didn't ask, include only when the issue implies a repair.

### ‚ö†Ô∏è Compliance Check (if intent == compliance_check)
State: "Compliant" or "Not Compliant" and cite violated clauses.

### ‚úÖ Final Answer
Summarize everything in 3‚Äì5 lines.
"""

    # === Intent-specific prompt injection ===
    if intent == "general_query":
        role_prompt = "You are Road Safety Intervention GPT. Answer strictly based on IRC standards."
    elif intent == "recommend_fix":
        role_prompt = "You are an IRC-certified Highway Safety Engineer. Recommend the correct intervention using IRC clauses."
    elif intent == "cost_estimate":
        role_prompt = "You are an IRC-based Indian Highway Cost Estimation Expert. Provide fix + cost ranges."
    elif intent == "compliance_check":
        role_prompt = "You are an IRC Compliance Auditor. Evaluate compliance based on standards in context."

    system_prompt = f"{role_prompt}\nFollow the structured format below strictly.\n\n{STRUCTURE_TEMPLATE}"

    user_prompt = f"Context:\n{context_text}\n\nUser Query: {query}\nGenerate a structured IRC-compliant answer only using the context."

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    response = llm_generator(prompt)[0]['generated_text']
    answer = response[len(prompt):].strip()
    return answer

In [7]:
# ==============================================
# Complete Query Handler
# ==============================================
def ask_bot(query):
    intent = detect_intent(query)
    retrieved = vector_store.similarity_search(query, k=4)
    context = [doc.page_content for doc in retrieved]

    result = generate_answer(context, query, intent)
    return result

In [None]:

print(ask_bot("What is the correct intervention for a faded STOP sign?"))

### üîç Problem Interpretation
The user is seeking guidance on the correct intervention for a faded STOP sign, which is a type of stop line marking at intersections.

### üìò Relevant IRC Clauses
IRC:67-2022 - Clause 11.3.3.3, "Stop Line Markings", states: "Stop line markings shall be applied at every stop-control junction or signalized junction, and at every point where vehicles are required to stop for the safety of pedestrians, other road users or to avoid obstruction."

### üõ†Ô∏è Recommended Intervention
The correct intervention is to repaint the faded STOP sign.

### üß© Why This Works
According to IRC:67-2022 - Clause 11.3.3.3, stop line markings are required at every stop-control junction or signalized junction, and repainting the faded sign ensures clarity and maintains the safety of pedestrians and other road users.

### üìù Step-by-Step Fix Guide
1. Inspect the faded STOP sign and assess its condition.
2. Clean the surface to ensure a smooth paint application.
3. Remove

In [10]:
print(ask_bot("My road markings are faded and not retro-reflective. What's the rule for that?"))



### üîç Problem Interpretation
The user is asking about the rule for refilling or replacing faded and non-retro-reflective road markings, specifically the conditions under which such markings should be refurbished or replaced.

### üìò Relevant IRC Clauses
IRC: 13.3 - "The retro-reflective sheeting used shall be replaced either at the end of the warranty period of the retro-reflective sheeting or if its reflectivity falls below 80 percent of the initial reflectivity."

### üõ†Ô∏è Recommended Intervention
The road markings should be repainted or refurbished with new retro-reflective sheeting to maintain visibility and ensure drivers can detect the markings at least two seconds ahead of their speed.

### üß© Why This Works
According to IRC: 14.4, "Road markings must be clearly visible day and night, providing essential guidance, especially on unlit roads. Drivers shall detect markings at least two seconds ahead and that minimum preview distance with respect to speed is as follows:...

In [14]:
print(ask_bot("My road markings are faded and not retro-reflective. What's the rule for that?"))



<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 15 Nov 2025

You are Road Safety Intervention GPT. Answer strictly based on IRC standards.
Follow the structured format below strictly.


### üîç Problem Interpretation
Provide a clear explanation of what the user's query means.

### üìò Relevant IRC Clauses
List all related IRC clauses available in the context.
(cite exactly from metadata, e.g., IRC:67-2022 - Clause 14.4)

### üõ†Ô∏è Recommended Intervention
Explain the precise intervention required.
Base entirely on IRC standards.

### üß© Why This Works
Explain engineering justification according to IRC.

### üìù Step-by-Step Fix Guide
Provide clear numbered steps for execution (3‚Äì7 steps).

### üí∞ Estimated Cost (if applicable)
Provide a cost range in ‚Çπ using Indian engineering rates.
If user didn't ask, include only when the issue implies a repair.

### ‚ö†Ô∏è Compliance Check (if intent == compliance_check)
St

In [8]:
print(ask_bot("What are effective speed reduction measures?"))



### üîç Problem Interpretation
The user is seeking effective speed reduction measures to address speeding-related crashes, particularly in high-risk spots.

### üìò Relevant IRC Clauses
IRC:67-2022 - Clause 14.4 states: "Where the speed of traffic is found to be excessive, the Authority shall take necessary measures to reduce the speed of traffic, such as the use of signs, signals, or other traffic control devices."
IRC:67-2022 - Clause 14.6 states: "Where the Authority considers it necessary, it shall take measures to reduce the risk of accidents, including the use of traffic calming devices."

### üõ†Ô∏è Recommended Intervention
Effective speed reduction measures include:
1. Curve Approach Speed Reduction Signs
2. Speed Limit 20/30 kmph Zones
3. Traffic Calming Devices

### üß© Why This Works
These measures work by:
- Speed Limit 20/30 kmph Zones: Reducing crash severity for children and vulnerable road users
- Traffic Calming Devices: Forcing drivers to slow down

### üìù Step-