In [1]:
!pip install scikit-learn numpy pandas nltk matplotlib imblearn gradio

Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Collecting gradio
  Downloading gradio-5.20.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.2 (from gradio)
  Downloading gradio_client-1.7.2-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Down

In [3]:
!pip install emoji

Collecting emoji
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.14.1-py3-none-any.whl (590 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m590.6/590.6 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.14.1


In [4]:
import pandas as pd
import re
import string
import emoji
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# ✅ Define emoji-based classification
CYBERBULLYING_EMOJIS = {"😡", "👊", "💀", "🤬", "😠", "👿", "🖕", "💢", "🔪"}
NON_CYBERBULLYING_EMOJIS = {"😊", "❤️", "👍", "😁", "😇", "🎉", "😂", "💖", "🥰"}

# ✅ Function to check if input contains **only emojis**
def contains_only_emojis(text):
    text = text.strip()
    return all(char in emoji.EMOJI_DATA for char in text) and len(text) > 0

# ✅ Fully Fixed Emoji Classification Function
def classify_emoji(input_text):
    input_text = input_text.strip()
    if contains_only_emojis(input_text):  # If the input is only emojis
        # Check each emoji and classify
        for char in input_text:
            if char in CYBERBULLYING_EMOJIS:
                return "Cyberbullying"
            elif char in NON_CYBERBULLYING_EMOJIS:
                return "Non-Cyberbullying"
        return "Non-Cyberbullying"  # Default if no match is found
    return None  # If it's mixed with text, process normally

# ✅ Function to clean and preprocess text
def preprocess_text(text):
    text = text.lower().strip()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

# ✅ Load dataset (Update path if needed)
dataset_path = "expanded_cyberbullying_dataset.csv"
df = pd.read_csv(dataset_path)

# ✅ Preprocess dataset
df["processed_text"] = df["text"].apply(preprocess_text)

# ✅ Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    df["processed_text"], df["label"], test_size=0.2, random_state=42
)

# ✅ Create a Naïve Bayes classifier pipeline
model_pipeline = Pipeline([
    ("tfidf", TfidfVectorizer()),  # Convert text to numerical features
    ("classifier", MultinomialNB())  # Train Naïve Bayes model
])

# ✅ Train the model
model_pipeline.fit(X_train, y_train)

# ✅ Evaluate model performance
y_pred = model_pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=["Non-Cyberbullying", "Cyberbullying"])

# ✅ Print results
print(f"Model Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", classification_rep)

# ✅ Final cyberbullying detection function (Text + Emoji)
def predict_cyberbullying(user_input):
    emoji_prediction = classify_emoji(user_input)  # Check for emoji classification first
    if emoji_prediction:
        return emoji_prediction  # If emoji-based classification is valid, return it

    # Otherwise, classify using the trained model
    processed_input = preprocess_text(user_input)
    prediction = model_pipeline.predict([processed_input])[0]
    return "Cyberbullying" if prediction == 1 else "Non-Cyberbullying"

# ✅ Testing the system again
test_inputs = ["I hate you!", "You are amazing! 😊","🫶🏽", "😡", "You are a failure!", "Keep up the great work! 👍"]
for text in test_inputs:
    print(f"Input: {text} --> Prediction: {predict_cyberbullying(text)}")


Model Accuracy: 0.9950

Classification Report:
                    precision    recall  f1-score   support

Non-Cyberbullying       0.99      1.00      1.00       107
    Cyberbullying       1.00      0.99      0.99        93

         accuracy                           0.99       200
        macro avg       1.00      0.99      0.99       200
     weighted avg       1.00      0.99      0.99       200

Input: I hate you! --> Prediction: Cyberbullying
Input: You are amazing! 😊 --> Prediction: Non-Cyberbullying
Input: 🫶🏽 --> Prediction: Non-Cyberbullying
Input: 😡 --> Prediction: Cyberbullying
Input: You are a failure! --> Prediction: Cyberbullying
Input: Keep up the great work! 👍 --> Prediction: Non-Cyberbullying


In [5]:
import pandas as pd
import re
import string
import emoji
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# ✅ Define emoji-based classification (Expanded for multi-character emojis)
CYBERBULLYING_EMOJIS = {"😡", "👊", "💀", "🤬", "😠", "👿", "🖕", "💢", "🔪", "🖕🏽", "🖕🏻", "🖕🏾", "🖕🏿"}
NON_CYBERBULLYING_EMOJIS = {"😊", "❤️", "💝", "👍", "😁", "😇", "🎉", "😂", "💖", "🥰", "🫶🏽", "🧑🏽‍🎨"}

# ✅ Function to check if input contains **only emojis**
def contains_only_emojis(text):
    text = text.strip()
    emoji_count = sum(1 for char in text if char in emoji.EMOJI_DATA)  # Count valid emoji characters
    return emoji_count == len(text)  # Ensure input contains only emojis

# ✅ Fully Fixed Emoji Classification Function (Handles 🖕🏽, 🧑🏽‍🎨 correctly)
def classify_emoji(input_text):
    input_text = input_text.strip()
    if contains_only_emojis(input_text):  # If the input is only emojis
        if input_text in NON_CYBERBULLYING_EMOJIS:
            return "Non-Cyberbullying"
        elif input_text in CYBERBULLYING_EMOJIS:
            return "Cyberbullying"
        return "Non-Cyberbullying"  # Default if unknown emoji is present
    return None  # If it's mixed with text, process normally

# ✅ Function to clean and preprocess text
def preprocess_text(text):
    text = text.lower().strip()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

# ✅ Load dataset (Update path if needed)
dataset_path = "expanded_cyberbullying_dataset.csv"
df = pd.read_csv(dataset_path)

# ✅ Preprocess dataset
df["processed_text"] = df["text"].apply(preprocess_text)

# ✅ Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    df["processed_text"], df["label"], test_size=0.2, random_state=42
)

# ✅ Create a Naïve Bayes classifier pipeline
model_pipeline = Pipeline([
    ("tfidf", TfidfVectorizer()),  # Convert text to numerical features
    ("classifier", MultinomialNB())  # Train Naïve Bayes model
])

# ✅ Train the model
model_pipeline.fit(X_train, y_train)

# ✅ Evaluate model performance
y_pred = model_pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=["Non-Cyberbullying", "Cyberbullying"])

# ✅ Print results
print(f"Model Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", classification_rep)

# ✅ Final cyberbullying detection function (Text + Emoji)
def predict_cyberbullying(user_input):
    emoji_prediction = classify_emoji(user_input)  # Check for emoji classification first
    if emoji_prediction:
        return emoji_prediction  # If emoji-based classification is valid, return it

    # Otherwise, classify using the trained model
    processed_input = preprocess_text(user_input)
    prediction = model_pipeline.predict([processed_input])[0]
    return "Cyberbullying" if prediction == 1 else "Non-Cyberbullying"

# ✅ USER INPUT SECTION - Interactive Testing
print("\n🔹 Cyberbullying Detection System 🔹")
print("Enter text or emojis to check for cyberbullying.")
print("Type 'exit' to stop the program.\n")

while True:
    user_input = input("Enter text or emojis: ").strip()
    if user_input.lower() == "exit":
        print("Exiting the system. Have a great day! 😊")
        break

    prediction = predict_cyberbullying(user_input)
    print(f"🔹 Prediction: {prediction}\n")


Model Accuracy: 0.9950

Classification Report:
                    precision    recall  f1-score   support

Non-Cyberbullying       0.99      1.00      1.00       107
    Cyberbullying       1.00      0.99      0.99        93

         accuracy                           0.99       200
        macro avg       1.00      0.99      0.99       200
     weighted avg       1.00      0.99      0.99       200


🔹 Cyberbullying Detection System 🔹
Enter text or emojis to check for cyberbullying.
Type 'exit' to stop the program.

Enter text or emojis: exit
Exiting the system. Have a great day! 😊


In [6]:
import pandas as pd
import re
import string
import emoji
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# ✅ Define emoji-based classification (Expanded for multi-character emojis)
CYBERBULLYING_EMOJIS = {"😡", "👊", "💀", "🤬", "😠", "👿", "🖕", "💢", "🔪", "🖕🏽", "🖕🏻", "🖕🏾", "🖕🏿"}
NON_CYBERBULLYING_EMOJIS = {"😊", "❤️", "💝", "👍", "😁", "😇", "🎉", "😂", "💖", "🥰", "🫶🏽", "🧑🏽‍🎨"}

# ✅ Function to check if input contains **only emojis**
def contains_only_emojis(text):
    text = text.strip()
    emoji_count = sum(1 for char in text if char in emoji.EMOJI_DATA)  # Count valid emoji characters
    return emoji_count == len(text)  # Ensure input contains only emojis

# ✅ Fully Fixed Emoji Classification Function
def classify_emoji(input_text):
    input_text = input_text.strip()
    if contains_only_emojis(input_text):  # If the input is only emojis
        if input_text in NON_CYBERBULLYING_EMOJIS:
            return "Non-Cyberbullying"
        elif input_text in CYBERBULLYING_EMOJIS:
            return "Cyberbullying"
        return "Non-Cyberbullying"  # Default if unknown emoji is present
    return None  # If it's mixed with text, process normally

# ✅ Function to clean and preprocess text
def preprocess_text(text):
    text = text.lower().strip()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

# ✅ Load dataset (Update path if needed)
dataset_path = "expanded_cyberbullying_dataset.csv"
df = pd.read_csv(dataset_path)

# ✅ Preprocess dataset
df["processed_text"] = df["text"].apply(preprocess_text)

# ✅ Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    df["processed_text"], df["label"], test_size=0.2, random_state=42
)

# ✅ Create a Naïve Bayes classifier pipeline
model_pipeline = Pipeline([
    ("tfidf", TfidfVectorizer()),  # Convert text to numerical features
    ("classifier", MultinomialNB())  # Train Naïve Bayes model
])

# ✅ Train the model
model_pipeline.fit(X_train, y_train)

# ✅ Evaluate model performance
y_pred = model_pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=["Non-Cyberbullying", "Cyberbullying"])

# ✅ Print results
print(f"Model Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", classification_rep)

# ✅ Final cyberbullying detection function (Text + Emoji)
def predict_cyberbullying(user_input):
    emoji_prediction = classify_emoji(user_input)  # Check for emoji classification first
    if emoji_prediction:
        return emoji_prediction  # If emoji-based classification is valid, return it

    # Otherwise, classify using the trained model
    processed_input = preprocess_text(user_input)
    prediction = model_pipeline.predict([processed_input])[0]
    return "Cyberbullying" if prediction == 1 else "Non-Cyberbullying"

# ✅ GRADIO INTERFACE
def gradio_interface(input_text):
    result = predict_cyberbullying(input_text)
    return f"Prediction: {result}"

interface = gr.Interface(
    fn=gradio_interface,
    inputs="text",
    outputs="text",
    title="Cyberbullying Detection System",
    description="Enter a phrase or emoji to check if it is cyberbullying."
)

# ✅ Launch Gradio App
interface.launch()


Model Accuracy: 0.9950

Classification Report:
                    precision    recall  f1-score   support

Non-Cyberbullying       0.99      1.00      1.00       107
    Cyberbullying       1.00      0.99      0.99        93

         accuracy                           0.99       200
        macro avg       1.00      0.99      0.99       200
     weighted avg       1.00      0.99      0.99       200

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://aee1e047ffe11d5cb9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


