<a href="https://colab.research.google.com/github/Atulkhiyani0909/NyayaSetu/blob/main/ChatBot/Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Dependencies

In [6]:
!pip install --quiet langchain-core langchain langchain-google-genai pinecone-client langchain-pinecone langchain-huggingface pydantic gradio transformers

## Environment

In [7]:
from google.colab import userdata
import os

In [8]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = userdata.get('LANGCHAIN_API_KEY')

os.environ['GOOGLE_API_KEY']=userdata.get('GOOGLE_API_KEY')

os.environ['PINECONE_API_KEY']=userdata.get('PINECONE_API_KEY')

## Importing Libraries

In [9]:
import os
from typing import List, Dict

# Core ML and RAG Libraries
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings
from pydantic import BaseModel,Field
from functools import lru_cache
from langchain.load import dumps,loads

In [10]:
@lru_cache(maxsize=1)
def get_llm(temp=0):
    return ChatGoogleGenerativeAI(model='gemini-2.0-flash',temperature=temp)

llm = get_llm()

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from pydantic import BaseModel, Field

class language_detector(BaseModel):
    language: str = Field(..., description="Detected Language")
    translated: str = Field(..., description="Translated to English")

def query_to_english(query:str,memory) -> dict:
    """Detects the language of the input query and translates it to English."""

    lan_example = '''{
        "language": "Hindi",
        "translated": "Hello, how are you?"
    }'''

    prompt = """Translate the following query to clear English while preserving its context and intent you may imporve its wordings for better understanding.
    If the query is ambiguous, you can rephrase it, but do not change its original meaning utilize this Chat history to rewrite
    this ambigous query : {memory}

    Query: {query}

    Also, detect the language of the query and store it in "language".

    Output should strictly follow this format:
    {example}
    """

    llm3 = get_llm().with_structured_output(language_detector)

    trans_template = ChatPromptTemplate.from_template(
        template=prompt,
        partial_variables={
            'example': lan_example,
            'memory': memory
            }
    )

    trans_chain = trans_template | llm3  # No need for StrOutputParser since output is structured
    return trans_chain.invoke({'query': query}).model_dump()  # Ensure structured dict output

In [11]:
class TalkBack(BaseModel):
    talkback: bool = Field(..., description="Talkback")

#This decide should it talkback or should go for direct retrieval which ia a little time consuming!
def should_talkback(query: str,memory) -> dict:
    """Should talkback or not"""

    prompt = '''
As a legal assistant for NyayaSetu,utilize memory for deciding -- only return `True` if:
    1. The query is impossible to answer without more details (e.g., "What happens?" without context)
    2. Combines 3+ unrelated legal issues
    3. Contains contradictory information
    4. Contains greeting or useless information or talk (cross check using chat_history)

    Return `False` if:
    1. Query describes a single clear legal issue
    2. User has provided sufficient context
    3. Query is a greeting or simple request
    4. User has already been asked for clarification but he choose not to based on Chat History.
    5. He asks for services we offer or other things related to website or bot.
    6. He explicitly just asks for an answer or 2-3 clarifying questions has been asked from him already based on Chat History.

    **Query Examples:**
    Ambiguous: "What happens if I have a problem with railway staff?"
    → `True`

    Clear: "What are my rights if RPF detains me for ticketless travel?"
    → `False`

    Ambiguous: "What can I do if police refuse to help me?"
    → `True`

    Clear: "How do I file a complaint against police misconduct during detention?"
    → `False`

    Direct ask: "Just give me the details/information/answer"
    -> `False`

    Ambigous: Based on Chat History , 2-3 clarifying questions has already been asked from the user
    -> `False`

    **User Query:** {query}

    **Chat History:** {memory}
    '''

    template = ChatPromptTemplate.from_template(
        template = prompt,
        partial_variables = {
            'memory': memory
        }
    )

    llm = get_llm().with_structured_output(TalkBack)

    chain = template | llm
    return chain.invoke({'query': query}).model_dump()['talkback']