In [1]:
%pip install -qU langchain langchain-community langchain-google-genai gradio_client python-dotenv langsmith

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from langchain.agents import initialize_agent, Tool
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents.agent_types import AgentType
from gradio_client import Client, handle_file
from langsmith import traceable
from dotenv import load_dotenv
from typing import Union, List, Dict, Any
import json
import mimetypes

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
load_dotenv()

True

In [4]:
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    max_retries=1,
    api_key=os.getenv("GOOGLE_API_KEY")
)

In [5]:
@traceable
def run_hinglish_hate_model(text: str) -> str:
    try:
        client = Client("dj-dawgs-ipd/IPD-Hinglish-Text-Model")
        label, score = client.predict(text, api_name="/predict")
        return f"Prediction: {label} with a score of {score}"
    except Exception as e:
        return f"Error calling Hinglish model: {str(e)}"

In [6]:
@traceable
def run_english_hate_model(text: str) -> str:
    try:
        client = Client("dj-dawgs-ipd/IPD-Text-English-Finetune")
        label, score = client.predict(text, api_name="/predict")
        return f"Prediction: {label} with a score of {score}"
    except Exception as e:
        return f"Error calling English model: {str(e)}"

In [7]:
@traceable
def run_image_hate_model(image_path: str) -> str:
    try:
        client = Client("dj-dawgs-ipd/IPD_IMAGE_PIPELINE")
        result = client.predict(handle_file(image_path), api_name="/predict")
        return f"Image Analysis Result: {result}"
    except Exception as e:
        return f"Error calling Image model: {str(e)}"

In [8]:
@traceable
def run_audio_hate_model(audio_path: str) -> str:
    try:
        client = Client("dj-dawgs-ipd/IPD-Audio-Pipeline")
        result = client.predict(handle_file(audio_path), api_name="/predict")
        return f"Audio Analysis Result: {result}"
    except Exception as e:
        return f"Error calling Audio model: {str(e)}"

In [9]:
@traceable
def run_sentiment_analysis(text: str) -> str:
    try:
        client = Client("cardiffnlp/twitter-roberta-base-sentiment-analysis")
        # The API is assumed to return a tuple: (sentiment_label, confidence_score)
        sentiment, score = client.predict(text, api_name="/predict")
        return f"Sentiment Analysis: {sentiment} with confidence {score}"
    except Exception as e:
        return f"Error in Sentiment Analysis: {str(e)}"

In [10]:
hinglish_tool = Tool.from_function(
    func=run_hinglish_hate_model,
    name="HinglishHateChecker",
    description="Detects hate speech in Hinglish text."
)

In [11]:
english_tool = Tool.from_function(
    func=run_english_hate_model,
    name="EnglishHateChecker",
    description="Detects hate speech in English text."
)

In [12]:
image_tool = Tool.from_function(
    func=run_image_hate_model,
    name="ImageHateChecker",
    description="Detects hate symbols or hateful content in images."
)

In [13]:
audio_tool = Tool.from_function(
    func=run_audio_hate_model,
    name="AudioHateChecker",
    description="Detects hate speech in audio files."
)

In [14]:
sentiment_tool = Tool.from_function(
    func=run_sentiment_analysis,
    name="SentimentAnalyzer",
    description="Analyzes the sentiment of text to assess emotional tone."
)

In [15]:
tools = [hinglish_tool, english_tool, image_tool, audio_tool, sentiment_tool]

In [16]:
def detect_modality(input_data: Union[str, os.PathLike]) -> str:
    if isinstance(input_data, str):
        if os.path.exists(input_data):
            mime_type, _ = mimetypes.guess_type(input_data)
            if mime_type:
                if mime_type.startswith("image"):
                    return "image"
                elif mime_type.startswith("audio"):
                    return "audio"
            return "unknown"
        else:
            # Assume text input
            if any(word in input_data.lower() for word in ["hai", "kya", "nahi", "chutiya", "bhen"]):
                return "hinglish"
            else:
                return "english"
    return "unknown"

In [24]:
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

In [25]:
@traceable
def pipeline(input_data: Union[str, os.PathLike]):
    modality = detect_modality(input_data)
    system_instruction = f"""
You are an AI agent that detects hate speech.
Modality detected: {modality}
Use the appropriate tool:
- HinglishHateChecker for Hinglish
- EnglishHateChecker for English
- ImageHateChecker for image files
- AudioHateChecker for audio files

Full multimodal and multi-step pipeline:
    1. Detects modality and (for text) language.
    2. Calls external tools (hate detection, sentiment analysis, contextual lookup) as appropriate.
    3. Constructs a detailed chain-of-thought prompt including all tool outputs.
    4. Uses the Gemini LLM to consolidate and produce a final decision.

You can use multiple tools if necessary and perform a chain of operations. 
Use your reasoning to decide which tools to use.

Based on these tool outputs and your own reasoning, decide:
- Is the input hate speech? (Answer Yes or No)
- Provide a clear explanation integrating the tool outputs and any additional reasoning.

Provide your answer in the format:
"Hate Speech: Yes/No — Reason: <tool output + your reasoning>"
"""

    prompt = f"{system_instruction}\n\nInput: {input_data}"
    return agent.invoke(prompt)

In [19]:
# print(pipeline("chutiyo logo ko mumbai me lena hi nhi chahiye"))
# print(pipeline("swastika.png"))
print(pipeline("hate_audio.wav"))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe modality is audio, so I should use the AudioHateChecker to detect hate speech.
Action: AudioHateChecker
Action Input: hate_audio.wav[0mLoaded as API: https://dj-dawgs-ipd-ipd-audio-pipeline.hf.space ✔

Observation: [36;1m[1;3mAudio Analysis Result: {'prediction': 'hate', 'language': 'Hinglish', 'label': None, 'confidence': 0.4111775557200114, 'hate_text': "that they have flash type of shows and turf war or gosh or ah yes ah ah the can share say that where i want one that i were you wish to how the flu or shop on oh oh oh oh oh oh oh yeah oh yeah yeah that half an hour or that the blue team has ah ha ha ha ha ha ha ha for years and has shot shot yeah and then the heart and i mean know i haven't shown long oh hey hey a patch or worse cause more how long shot"}[0m
Thought:[32;1m[1;3mThe AudioHateChecker detected hate speech in the audio file. The extracted text contains potentially hateful phrases. Therefore, the input

In [26]:
@traceable
def evaluate_pipeline(test_dataset: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Evaluates the pipeline over a dataset of examples.
    
    Each dataset entry should be a dict with:
      - 'input': The input data (text, file path, etc.)
      - 'expected': The expected label (optional)
    
    Returns a list of results with input, expected, output, and (optionally) a score.
    """
    results = []
    for example in test_dataset:
        inp = example.get("input")
        expected = example.get("expected", "N/A")
        output = pipeline(inp)
        result = {
            "input": inp,
            "expected": expected,
            "output": output,
            "modality": detect_modality(inp)
        }
        results.append(result)
    return results

In [27]:
test_dataset = [
    {"input": "chutiyo logo ko mumbai me lena hi nhi chahiye", "expected": "Yes"},
    {"input": "I love the diverse culture of this city", "expected": "No"},
    # For file paths, replace with actual image/audio file paths:
    {"input": "swastika.png", "expected": "Yes"},
    {"input": "hate_audio.wav", "expected": "Yes"},
]

In [28]:
eval_results = evaluate_pipeline(test_dataset)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe input is in Hinglish. I should use the HinglishHateChecker to detect hate speech.
Action: HinglishHateChecker
Action Input: chutiyo logo ko mumbai me lena hi nhi chahiye[0mLoaded as API: https://dj-dawgs-ipd-ipd-hinglish-text-model.hf.space ✔

Observation: [36;1m[1;3mPrediction: OAG with a score of 0.9616120457649231[0m
Thought:[32;1m[1;3mThe HinglishHateChecker tool detected the input text as offensive, aggressive, or abusive (OAG) with a high score of 0.96. This indicates a high probability that the text contains hate speech or offensive content.

Hate Speech: Yes — Reason: The HinglishHateChecker tool identified the text "chutiyo logo ko mumbai me lena hi nhi chahiye" as offensive, aggressive, or abusive with a score of 0.96, indicating a high likelihood of hate speech.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3mThe HinglishHateChecker tool identified the text "chutiyo

In [30]:
print("\nEvaluation Results:")
for res in eval_results:
    print(json.dumps(res, indent=2))


Evaluation Results:
{
  "input": "chutiyo logo ko mumbai me lena hi nhi chahiye",
  "expected": "Yes",
  "output": {
    "input": "\nYou are an AI agent that detects hate speech.\nModality detected: english\nUse the appropriate tool:\n- HinglishHateChecker for Hinglish\n- EnglishHateChecker for English\n- ImageHateChecker for image files\n- AudioHateChecker for audio files\n\nFull multimodal and multi-step pipeline:\n    1. Detects modality and (for text) language.\n    2. Calls external tools (hate detection, sentiment analysis, contextual lookup) as appropriate.\n    3. Constructs a detailed chain-of-thought prompt including all tool outputs.\n    4. Uses the Gemini LLM to consolidate and produce a final decision.\n\nYou can use multiple tools if necessary and perform a chain of operations. \nUse your reasoning to decide which tools to use.\n\nBased on these tool outputs and your own reasoning, decide:\n- Is the input hate speech? (Answer Yes or No)\n- Provide a clear explanation in