In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("labeled_data.csv")


# View first few rows
print(df.columns.tolist())
df.head()




In [None]:
# Install ngrok and other Python libraries
!pip install fastapi uvicorn "uvicorn[standard]" streamlit requests langdetect nest_asyncio pyngrok

In [None]:
%%writefile main.py
# main.py - This is the backend API using FastAPI

from fastapi import FastAPI
from pydantic import BaseModel
from typing import Dict, Any
import requests
import re
from langdetect import detect, DetectorFactory
import uvicorn
import nest_asyncio

# This line helps FastAPI run correctly in Colab
nest_asyncio.apply()

DetectorFactory.seed = 0 # For consistent language detection

app = FastAPI()

# --- Configuration ---
# IMPORTANT: Replace "YOUR_HUGGINGFACE_API_TOKEN" with your actual token
HF_API_TOKEN = "YOUR_HUGGINGFACE_API_TOKEN"

class TextInput(BaseModel):
    text: str

# --- Mock API Call Functions (Simulate external services) ---
# In a real app, these would make actual calls to Google Translate, HuggingFace, etc.

async def librettranslate_mock_api(text: str, source_lang: str, target_lang: str) -> Dict[str, Any]:
    """Mocks LibreTranslate for text translation."""
    print(f"MOCK: Translating '{text}' from {source_lang} to {target_lang}")
    mock_translations = {
        "hi": "You are so smart",
        "ur": "You are so smart",
        "ar": "You are so smart",
        "zh": "You are so smart",
        "ru": "You are so smart",
        "es": "You are so smart",
        "fr": "You are so smart",
        "de": "You are so smart",
    }
    translated_text = mock_translations.get(source_lang, f"MOCK TRANSLATED: {text}")
    return {"translatedText": translated_text}


async def huggingface_mock_inference_api(model_name: str, text: str, api_token: str) -> Dict[str, Any]:
    """Mocks HuggingFace models for sarcasm/hate speech detection."""
    print(f"MOCK: Calling HuggingFace model '{model_name}' with text: '{text}'")
    text_lower = text.lower()

    if "sarcasm" in model_name:
        is_sarcastic = False
        if "not" in text_lower and ("great" in text_lower or "amazing" in text_lower or "fantastic" in text_lower):
            is_sarcastic = True
        elif re.search(r"^(wow|oh boy|sure|obviously).*(!|\.)?$", text_lower):
            is_sarcastic = True
        elif len(text.split()) > 3 and all(c.isupper() or not c.isalpha() for c in text):
            is_sarcastic = True

        return [{"label": "sarcasm", "score": 0.95}] if is_sarcastic else [{"label": "not_sarcasm", "score": 0.98}]

    elif "hate" in model_name or "offensive" in model_name:
        label = "neutral"
        score = 0.99
        if any(keyword in text_lower for keyword in ["idiot", "stupid", "dumb", "loser"]):
            label = "offensive"
            score = 0.75
        if any(keyword in text_lower for keyword in ["hate", "kill", "racist", "sexist", "terrorist"]):
            label = "hate"
            score = 0.90
        if "f***" in text_lower or "shit" in text_lower or "bitch" in text_lower:
            label = "offensive"
            score = 0.80
        return [{"label": label, "score": score}]
    else:
        return [{"label": "unknown", "score": 0.0}]


# --- Main Classification Endpoint ---
@app.post("/analyze_text")
async def analyze_text(text_input: TextInput):
    """Processes text for language, sarcasm, and hate speech."""
    original_text = text_input.text
    detected_language = "en"
    translated_text = original_text

    try:
        detected_language = detect(original_text)
    except Exception:
        detected_language = "en"

    if detected_language != "en":
        translation_result = await librettranslate_mock_api(original_text, detected_language, "en")
        translated_text = translation_result.get("translatedText", original_text)

    if not translated_text.strip():
        translated_text = original_text

    sarcasm_model_name = "cardiffnlp/twitter-roberta-base-sarcasm"
    sarcasm_result = await huggingface_mock_inference_api(sarcasm_model_name, translated_text, HF_API_TOKEN)
    sarcasm_label = "No"
    if sarcasm_result and sarcasm_result[0].get("label") == "sarcasm" and sarcasm_result[0].get("score", 0) > 0.5:
        sarcasm_label = "Yes"

    hate_speech_model_name = "Hate-speech-CNERG/dehatebert-mono-english"
    hate_speech_result = await huggingface_mock_inference_api(hate_speech_model_name, translated_text, HF_API_TOKEN)
    hate_speech_label = "neutral"
    toxicity_score = 0.0

    if hate_speech_result and hate_speech_result[0].get("label"):
        predicted_label = hate_speech_result[0].get("label").lower()
        if predicted_label == "hate":
            hate_speech_label = "Hate"
        elif predicted_label == "offensive":
            hate_speech_label = "Offensive"
        toxicity_score = hate_speech_result[0].get("score", 0.0)

    return {
        "language": detected_language,
        "translated_text": translated_text,
        "sarcasm": sarcasm_label,
        "hate_speech": hate_speech_label,
        "toxicity_score": round(toxicity_score, 2)
    }
```python
# Run the FastAPI app in the background
uvicorn.run(app, host="0.0.0.0", port=8000, log_level="error")

In [None]:
%%writefile streamlit_app.py
# streamlit_app.py - This is the frontend application using Streamlit

import streamlit as st
import requests
import json

# This is where your Streamlit app will talk to your FastAPI backend
API_URL = "http://localhost:8000/analyze_text"

st.title("Multi-Language Real-Time Sarcasm & Hate Speech Detector")
st.markdown("""
    Enter text below to detect language, translate if needed, and analyze for sarcasm,
    hate speech, and overall toxicity.
    *Note: This is a demo using mock API calls for translation and NLP models.*
""")

txt = st.text_area("Enter text", height=120, placeholder="Paste a message in any language...")

if st.button("Analyze") and txt.strip():
    with st.spinner("Analyzing text..."): # Show a loading message
        try:
            r = requests.post(API_URL, json={"text": txt}, timeout=60)

            if r.ok:
                res = r.json()

                st.write("---")
                st.subheader("Analysis Results:")

                st.info(f"**Detected Language:** `{res.get('language', 'Unknown')}`")
                if res.get('language') != 'en':
                    st.info(f"**Translated to English:** `{res.get('translated_text', 'N/A')}`")

                badge = "🟢 Safe"
                if res.get("sarcasm") == "Yes":
                    badge = "🟡 Sarcastic"
                if res.get("hate_speech") == "Offensive":
                    badge = "🔴 Offensive"
                if res.get("hate_speech") == "Hate":
                    badge = "🚨 Hate Speech Detected!"

                st.markdown(f"### Overall Sentiment: {badge}")
                st.markdown(f"**Sarcasm:** `{res.get('sarcasm', 'N/A')}`")
                st.markdown(f"**Hate Speech:** `{res.get('hate_speech', 'N/A')}`")

                toxicity_score = res.get('toxicity_score', 0.0)
                st.markdown(f"**Toxicity Score:** `{toxicity_score}`")
                st.progress(toxicity_score if 0 <= toxicity_score <= 1 else 0.0, text="Toxicity Level")

                st.write("---")
                st.caption("Raw JSON Response:")
                st.json(res)
            else:
                st.error(f"API Error: {r.status_code} - {r.text}")
        except requests.exceptions.ConnectionError:
            st.error(f"Could not connect to the API server at {API_URL}. "
                     "Please ensure the backend is running (check the cell above).")
        except requests.exceptions.Timeout:
            st.error("The request timed out. The backend API might be slow or unresponsive.")
        except Exception as e:
            st.error(f"An unexpected error occurred: {e}")

In [None]:
# Authenticate ngrok with your token
from pyngrok import ngrok
NGROK_AUTH_TOKEN = "30uXoFuxaNIiu1BnwjwpL8AcPbf_3qZEKDArHnFQ8ex8g7gd6" 
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

In [None]:
# Run Streamlit and tunnel it with ngrok
from pyngrok import ngrok
import threading
import time
import subprocess

def start_streamlit():
    # This will run Streamlit in the background
    subprocess.run(["streamlit", "run", "streamlit_app.py", "--server.port", "8501", "--server.enableCORS", "true", "--server.enableXsrfProtection", "false"])

# Start Streamlit in a separate thread
streamlit_thread = threading.Thread(target=start_streamlit)
streamlit_thread.daemon = True
streamlit_thread.start()

# Give Streamlit a moment to start up
time.sleep(5)

# Create ngrok tunnel
public_url = ngrok.connect(8501)
print(f"Your Streamlit app is available at: {public_url}")

# Keep the cell alive until you manually stop it
try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    print("Stopping ngrok tunnel and Streamlit app.")
    ngrok.kill()