In [5]:
!pip install transformers matplotlib seaborn wordcloud ipywidgets rich




In [6]:
from transformers import pipeline
import matplotlib.pyplot as plt
import seaborn as sns
import random
from wordcloud import WordCloud
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, Markdown
from rich import print
from rich.panel import Panel


In [7]:
# English Toxicity Classifier
english_classifier = pipeline("text-classification", model="unitary/toxic-bert")

# Multilingual Classifier (Hindi/Other)
multilingual_classifier = pipeline("text-classification", model="facebook/bart-large-mnli")

# Sentiment Classifier (Optional, if you want sentiment too)
sentiment_classifier = pipeline("sentiment-analysis")


Device set to use cpu


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


In [8]:
# English polite alternatives
polite_comments_english = [
    "Let's stay respectful towards each other.",
    "Please express your views kindly.",
    "Let's use positive and encouraging words.",
    "Politeness costs nothing but means everything."
]

# Hindi polite alternatives
polite_comments_hindi = [
    "कृपया अपनी बात को विनम्रता से कहें।",
    "सकारात्मक शब्दों का उपयोग करें।",
    "सभी के प्रति सम्मान बनाए रखें।",
    "आपके शब्द दूसरों पर प्रभाव डाल सकते हैं, कृपया ध्यान दें।"
]

# Inspirational Quotes
krishna_quotes = [
    "Speak only that which is true and pleasant. - Krishna",
    "Words have the power to both destroy and heal. When words are both true and kind, they can change the world. - Krishna"
]

buddha_quotes = [
    "Better than a thousand hollow words, is one word that brings peace. - Buddha",
    "Words have the power to both wound and heal. - Buddha"
]

mahavir_quotes = [
    "Speak the truth that is pleasant. Do not speak the unpleasant truth. - Mahavir",
    "Silence and kind speech are the true foundation of peace. - Mahavir"
]


In [9]:
def suggest_inspirational_quote():
    quotes = random.choice([krishna_quotes, buddha_quotes, mahavir_quotes])
    selected_quote = random.choice(quotes)
    panel = Panel(f"[bold yellow]{selected_quote}[/bold yellow]",
                  title="[green]🌟 Inspirational Thought 🌟",
                  subtitle="Reflect and Grow 🌱", expand=False)
    print(panel)

def suggest_alternative(language="english"):
    if language == "hindi":
        return random.choice(polite_comments_hindi)
    else:
        return random.choice(polite_comments_english)

def detect_language(text):
    # Simple Unicode range check for Hindi
    for ch in text:
        if '\u0900' <= ch <= '\u097F':
            return "hindi"
    return "english"

def detect_toxicity(comment, language="english"):
    if language == "english":
        result = english_classifier(comment)[0]
    else:
        result = multilingual_classifier(comment)[0]
    label = result['label']
    score = result['score']
    return label, score

def analyze_sentiment(comment):
    sentiment = sentiment_classifier(comment)[0]
    return sentiment['label'], sentiment['score']

def plot_toxicity_score(score):
    sns.set(style="whitegrid")
    plt.figure(figsize=(6,4))
    levels = ['Non-Toxic', 'Toxic']
    colors = ['green', 'red']
    plt.bar(levels, [1 - score, score], color=colors)
    plt.title('Toxicity Analysis of Your Comment', fontsize=16)
    plt.ylabel('Score')
    plt.ylim(0,1)
    for i, v in enumerate([1 - score, score]):
        plt.text(i, v + 0.02, f"{v:.2f}", ha='center', fontsize=12)
    plt.show()

def create_wordcloud(comment):
    wordcloud = WordCloud(width=800, height=400, background_color="white").generate(comment)
    plt.figure(figsize=(10,6))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.title("Word Cloud of Your Comment", fontsize=16)
    plt.show()

def get_feedback():
    feedback = widgets.RadioButtons(
        options=['yes', 'no'],
        description='Helpful?',
        disabled=False
    )
    display(feedback)

    def on_feedback_change(change):
        if change['new'] == 'yes':
            print("Thank you for your feedback! 😊")
        else:
            print("Sorry for the inconvenience! We'll work harder. 😞")

    feedback.observe(on_feedback_change, names='value')


In [10]:
def main_ui():
    display(Markdown("# 📝 Welcome to the Toxic Comment Filter (Hindi + English)"))
    display(Markdown("Analyze your Hindi or English comment for toxicity and get positive suggestions if needed. 🙌"))

    comment_box = widgets.Textarea(
        value='',
        placeholder='Type your comment here (Hindi या English दोनों में)...',
        description='Comment:',
        layout=widgets.Layout(width='90%')
    )
    display(comment_box)

    analyze_button = widgets.Button(description="Analyze Comment", button_style='success')
    display(analyze_button)

    output = widgets.Output()
    display(output)

    def on_button_click(b):
        with output:
            output.clear_output()
            comment = comment_box.value

            if comment.strip() == "":
                print("⚠ Please enter a comment to analyze.")
                return

            # Detect Language
            lang = detect_language(comment)
            if lang == "hindi":
                display(Markdown("🌐 *Detected Language: Hindi*"))
            else:
                display(Markdown("🌐 *Detected Language: English*"))

            # Detect Toxicity
            label, score = detect_toxicity(comment, language=lang)

            if (label.lower() == "toxic" or score > 0.5):
                display(Markdown(f"## ⚠ The comment appears TOXIC (Score: {score:.2f})"))
                display(Markdown(f"### ➡ Suggested Alternative: {suggest_alternative(language=lang)}"))
                suggest_inspirational_quote()
            else:
                display(Markdown(f"## ✅ The comment appears NON-TOXIC (Score: {score:.2f})"))

            # Plot and Visuals
            plot_toxicity_score(score)
            create_wordcloud(comment)

            # Sentiment Analysis (optional)
            sentiment, sentiment_score = analyze_sentiment(comment)
            display(Markdown(f"**Sentiment**: {sentiment} (Confidence: {sentiment_score:.2f})"))

            # Ask for Feedback
            get_feedback()

    analyze_button.on_click(on_button_click)


In [16]:
main_ui()


# 📝 Welcome to the Toxic Comment Filter (Hindi + English)

Analyze your Hindi or English comment for toxicity and get positive suggestions if needed. 🙌

Textarea(value='', description='Comment:', layout=Layout(width='90%'), placeholder='Type your comment here (Hi…

Button(button_style='success', description='Analyze Comment', style=ButtonStyle())

Output()