<a href="https://colab.research.google.com/github/Atulkhiyani0909/ByteVerse_NayaSetu/blob/main/Chatbot_RAG/NyayaSetu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Dependencies

In [None]:
%pip install --quiet --upgrade \
    langchain langchain-google-genai pymongo langchain_community \
    langchain_pinecone pinecone-client langchain_huggingface \
    numpy transformers --force-reinstall

## Environment

In [7]:
from google.colab import userdata
import os

In [8]:
os.environ['LANGCHAIN_TRACING_V2'] = userdata.get('LANGCHAIN_TRACING_V2')
os.environ['LANGCHAIN_ENDPOINT'] = userdata.get('LANGCHAIN_ENDPOINT')
os.environ['LANGCHAIN_API_KEY'] = userdata.get('LANGCHAIN_API_KEY')

os.environ['GOOGLE_API_KEY']=userdata.get('GOOGLE_API_KEY')
os.environ['HUGGING_FACE_API_KEY']=userdata.get('HUGGING_FACE_API_KEY')

os.environ['PINECONE_API_KEY']=userdata.get('PINECONE_API_KEY')
os.environ['MONGO_URL']=userdata.get('MONGO_URL')

## Importing Libraries

In [9]:
import os
from typing import List, Dict, Any, Optional,Literal,Union
from bson import ObjectId

# Core ML and RAG Libraries
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnableLambda
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.vectorstores import Pinecone
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
from langchain_huggingface import HuggingFaceEmbeddings
from pymongo import MongoClient
from pydantic import BaseModel,Field
from functools import lru_cache
from langchain.load import dumps,loads

In [10]:
@lru_cache(maxsize=1)
def get_llm():
    return ChatGoogleGenerativeAI(model='gemini-2.0-flash',temperature=0)

llm = get_llm()

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from pydantic import BaseModel, Field

class language_detector(BaseModel):
    language: str = Field(..., description="Detected Language")
    translated: str = Field(..., description="Translated to English")

def query_to_english(query: str,memory: List) -> dict:
    """Detects the language of the input query and translates it to English."""

    lan_example = '''{
        "language": "Hindi",
        "translated": "Hello, how are you?"
    }'''

    prompt = """Translate the following query to clear English while preserving its context and intent.
    If the query is ambiguous, you can rephrase it, but do not change its original meaning utilize this Chat history to rewrite
    this ambigous query : {memory}

    Query: {query}

    Also, detect the language of the query and store it in "language".

    Output should strictly follow this format:
    {example}
    """

    llm3 = get_llm().with_structured_output(language_detector)

    trans_template = ChatPromptTemplate.from_template(
        template=prompt,
        partial_variables={
            'example': lan_example,
            'memory': memory
            }
    )

    trans_chain = trans_template | llm3  # No need for StrOutputParser since output is structured
    return trans_chain.invoke({'query': query}).model_dump()  # Ensure structured dict output

In [11]:
memory = [()]

In [12]:
query_to_english("Meri police ne complain nahi darj karwai",memory)

{'language': 'Hindi', 'translated': 'My police complaint was not registered'}

In [25]:
class TalkBack(BaseModel):
    talkback: bool = Field(..., description="Talkback")

def should_talkback(query: str,memory: List) -> dict:
    """Should talkback or not"""

    prompt = '''
    As a legal assistant for NyayaSetu, analyze the user's query and history till now to determine if it requires clarification before providing an accurate legal response. Use these criteria to decide:
    In case of wild/unexpected/weird query , the answer should True only

    **Return `True` if the query:**
    1. Lacks sufficient details about the problem (e.g., vague or overly general).
    2. Uses ambiguous terms like "what happens", "can I", or "what are my rights" without specifying the context.
    3. Combines multiple legal issues into one query (e.g., "What can I do if my train is delayed and I have a dispute with the police?").
    4. Doesn't provide enough context about the situation (e.g., missing details like location, type of incident, or parties involved).

    **Return `False` if the query:**
    1. Clearly describes a single legal problem (e.g., "What are my rights if police refuse to file an FIR?").
    2. Includes sufficient context about the situation (e.g., "I was detained by RPF for ticketless travel; what can I do?").
    3. Can be directly mapped to a legal provision or process based on available information.

    **Query Examples:**

    Ambiguous: "What happens if I have a problem with railway staff?"
    → `True`

    Clear: "What are my rights if RPF detains me for ticketless travel?"
    → `False`

    Ambiguous: "What can I do if police refuse to help me?"
    → `True`

    Clear: "How do I file a complaint against police misconduct during detention?"
    → `False`

    **User Query:** {query}

    **Chat History:** {memory}
    '''

    template = ChatPromptTemplate.from_template(
        template = prompt,
        partial_variables = {
            'memory': memory
        }
    )

    llm = get_llm().with_structured_output(TalkBack)

    chain = template | llm
    return chain.invoke({'query': query}).model_dump()['talkback']

In [26]:
should_talkback('I got slapped by railway worker',memory)

True

In [27]:
should_talkback('get me my rights as per railway',memory)

True

## Talkback message

In [28]:
def talkback(query: str,memory: List,language: str) -> str:

    prompt = '''
    You are an AI assistant for a legal platform called NyayaSetu. Your goal is to refine vague user queries by asking for more details to provide accurate legal guidance.

    ## Context:
    - The user query may lack details, making it difficult to provide precise legal advice.
    - Use the chat history to understand the context and determine what information has already been provided.
    - Your task is to ask a single, logical follow-up question to clarify the user's intent or gather missing details.
    - Keep the follow-up question concise, polite, and relevant to the query.

    ## Chat History:
    {chat_history}

    ## User Query:
    {query}

    ## Response Format:
    - Reply in language as specified by the user in chat history (if available),secondary to {language} otherwise default to English.
    - Provide only one follow-up question that helps clarify the query or gather additional details.
    - Ensure the response feels conversational and engaging.

    ## Example Responses:
    1. **User Query:** "What happens if I have a problem with railway staff?"
    **AI:** "Could you clarify whether this is about ticket disputes, harassment by staff, or refusal to address complaints?"

    2. **User Query:** "Police won’t help with my complaint."
    **AI:** "Could you describe the issue in more detail? For example, is this about filing an FIR or addressing police misconduct?"

    3. **User Query:** "What are my rights if RPF detains me?"
    **AI:** "Could you provide more context? For instance, were you detained for ticketless travel or another issue?"

    4. **User Query:** "Can I get compensation for a train delay?"
    **AI:** "Could you specify how long the train was delayed and whether you had a reserved ticket?"

    Reply with only the follow-up question, nothing else.
    '''

    template = ChatPromptTemplate.from_template(
        template = prompt,
        partial_variables = {
            'chat_history':memory,
            'language':language
        }
    )

    llm = get_llm()

    chain = template | llm | StrOutputParser()
    return chain.invoke({'query': query})

In [29]:
talkback('I got hit by police officer',memory,'Hindi')

'Could you please provide more details about the incident, such as when and where it happened?'