#  Load and Preprocess Traffic Data

In [1]:
import os
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Load the embedding model (small)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Load traffic data from text file
def load_traffic_data(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        text = file.read()
    return text

# Split text into chunks
def split_text(text, chunk_size=500):
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    return chunks

# Convert text chunks to vector embeddings
def create_vector_database(chunks):
    vectors = embedding_model.encode(chunks, convert_to_numpy=True)
    vector_dim = vectors.shape[1]  

    # Initialize FAISS index
    index = faiss.IndexFlatL2(vector_dim)
    index.add(vectors)

    return index, chunks

# Load data and process it
text_data = load_traffic_data("D:\\C\\DEVELOPER\\Conversational-AI-for-Traffic-Enforcement-Assistance\\Dataset\\accident_reports.txt")  # Replace with your file
chunks = split_text(text_data)
index, chunk_store = create_vector_database(chunks)

print("Vector database created successfully.")


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Vector database created successfully.


# Retrieve Relevant Data

In [6]:
def retrieve_info(query,top_k = 10):
    query_vector = embedding_model.encode([query], convert_to_numpy=True)
    distances, indices = index.search(query_vector,top_k)

    results = [chunk_store[idx] for idx in indices[0]]
    return "\n".join(results)

# Example usage
query = "Which district has highest death due to accidents in 2023?"
retrieved_info = retrieve_info(query)
print("Retrieved Data:\n", retrieved_info)


Retrieved Data:
 D district (Hosdurg police station), an accident was reported under FIR No. 7691/2021. The accident occurred at ARUMALOOR in a Rural area, specifically in Village, involving a Motor Cycle and a Motor Cycle. The incident was categorized as Fatal with 1 fatalities, 0 grievous injuries, and 0 minor injuries. Pedestrians affected: 0, Cyclists affected: 0. Geographically, the accident occurred at coordinates (8.47786, 77.07411). The road conditions included Single lanes, no divider, and a Others feat
 district (Chirayinkil police station), an accident was reported under FIR No. 1151/2000. The accident occurred at HIGHCOURT JN in a Urban area, specifically in City, involving a Auto rickshaw and a Scooter. The incident was categorized as Non Injury with 0 fatalities, 0 grievous injuries, and 0 minor injuries. Pedestrians affected: 0, Cyclists affected: 0. Geographically, the accident occurred at coordinates (9.98141, 76.27628). The road conditions included Single lanes, no di

# LLM-Based Response Generation

In [2]:
from huggingface_hub import login
login("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")

In [1]:
from transformers import pipeline

# Load the free LLM model (Mistral-7B or Falcon-7B)
# llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
llm = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")

def generate_response(query):
    retrieved_info = retrieve_info(query)
    
    prompt = f"Use the following information to answer the query:\n{retrieved_info}\n\nUser Query: {query}\nAnswer:"
    response = llm(prompt, max_length=200, do_sample=True)[0]["generated_text"]
    
    return response

# Example usage
query = "Which district has highest death due to accidents in 2023?"
response = generate_response(query)
print("Chatbot Response:\n", response)


  from .autonotebook import tqdm as notebook_tqdm


: 