# SlateMate AI/ML Assignment: Educational Replacement System

**“Harm to Hope: AI Replacement of Unsafe Content”**.

### Objective:
1. Analyze harmful post text
2. Detect the theme
3. Suggest a safe educational replacement


In [2]:
import pandas as pd
import re
import random


In [3]:
# Load harmful post dataset
df = pd.read_csv("harmful_metadata.csv")
df.head()



Unnamed: 0,post_id,post_text,category,image_url
0,945394b5-ae1b-4f88-bff4-360a4d6dc632,I can't stand how stupid people are these days.,emotional distress,
1,f634445e-3ab7-4dee-9b8e-9edc69b47b48,I can't stand how stupid people are these days.,negativity,f634445e-3ab7-4dee-9b8e-9edc69b47b48.jpg
2,2b5e7f5c-1cd8-4160-90ee-4b120ff2c701,"Lol, this guy got wrecked. What a clown.",emotional distress,
3,f9c1c9fa-60aa-4f36-8c35-585435d6eb2e,Breakups suck. No one cares anyway.,anger,
4,30146c3f-33eb-4abc-99f7-2270e1156f5e,"You better watch your back, loser.",negativity,30146c3f-33eb-4abc-99f7-2270e1156f5e.jpg


In [4]:
keyword_theme_map = {
    "stupid|idiot|dumb": "toxic language",
    "loser|hate|kill": "aggression",
    "breakup|alone|sad|depressed|cry": "emotional distress",
    "fight|gun|shoot|punch|war": "violent content",
    "gossip|rumor|drama": "toxic gossip",
    "cheat|lie|betray": "betrayal",
    "lol|clown|meme|wrecked": "toxic humor",
    "ugly|fat|skinny|appearance": "body shaming",
    "no one cares|worthless": "negative self-talk",
    "game|killstreak|ragequit": "violent gaming"
}
pattern_theme = [(re.compile(k), v) for k, v in keyword_theme_map.items()]

def detect_theme(text, category=None):
    text = str(text).lower()
    for pattern, theme in pattern_theme:
        if pattern.search(text):
            return theme
    return category.lower() if category else "general negativity"

df['detected_theme'] = df.apply(lambda row: detect_theme(row['post_text'], row['category']), axis=1)
df[['post_text', 'detected_theme']].head()


Unnamed: 0,post_text,detected_theme
0,I can't stand how stupid people are these days.,toxic language
1,I can't stand how stupid people are these days.,toxic language
2,"Lol, this guy got wrecked. What a clown.",toxic humor
3,Breakups suck. No one cares anyway.,emotional distress
4,"You better watch your back, loser.",aggression


In [5]:
replacement_library = {
    "toxic language": [("AI-generated", "Learn about empathy through kindness stories ", "Promotes empathy.")],
    "toxic humor": [("AI-generated", "Watch fun science facts ", "Educational humor.")],
    "emotional distress": [("AI-generated", "Guide to emotional well-being ", "Supports coping.")],
    "aggression": [("YouTube link", "https://www.youtube.com/watch?v=7X3f2fMZL1c", "Promotes peacebuilding.")],
    "violent content": [("AI-generated", "Learn peaceful puzzle game design ", "Redirects aggression.")],
    "toxic gossip": [("AI-generated", "Start your entrepreneurial journey ", "Inspires ambition.")],
    "betrayal": [("AI-generated", "Trust and growth skills ", "Builds emotional maturity.")],
    "body shaming": [("AI-generated", "Body positivity science ", "Fosters self-care.")],
    "negative self-talk": [("AI-generated", "Positive affirmation practice ", "Builds self-worth.")],
    "violent gaming": [("YouTube link", "https://www.youtube.com/watch?v=2-D5vC3zjVE", "Creative game coding.")],
    "general negativity": [("AI-generated", "Daily gratitude habits ", "Counteracts negativity.")],
    "negativity": [("AI-generated", "Daily gratitude habits ", "Counteracts negativity.")],
    "anger": [("AI-generated", "5-min mindfulness routine ", "Emotional regulation.")],
    "toxicity": [("AI-generated", "Better conversations via psychology ", "Improves communication.")]
}

def get_replacement(theme):
    return random.choice(replacement_library.get(theme, replacement_library["general negativity"]))

df[['replacement_type', 'replacement_content', 'reasoning']] = df['detected_theme'].apply(
    lambda theme: pd.Series(get_replacement(theme))
)
df.head()


Unnamed: 0,post_id,post_text,category,image_url,detected_theme,replacement_type,replacement_content,reasoning
0,945394b5-ae1b-4f88-bff4-360a4d6dc632,I can't stand how stupid people are these days.,emotional distress,,toxic language,AI-generated,Learn about empathy through kindness stories,Promotes empathy.
1,f634445e-3ab7-4dee-9b8e-9edc69b47b48,I can't stand how stupid people are these days.,negativity,f634445e-3ab7-4dee-9b8e-9edc69b47b48.jpg,toxic language,AI-generated,Learn about empathy through kindness stories,Promotes empathy.
2,2b5e7f5c-1cd8-4160-90ee-4b120ff2c701,"Lol, this guy got wrecked. What a clown.",emotional distress,,toxic humor,AI-generated,Watch fun science facts,Educational humor.
3,f9c1c9fa-60aa-4f36-8c35-585435d6eb2e,Breakups suck. No one cares anyway.,anger,,emotional distress,AI-generated,Guide to emotional well-being,Supports coping.
4,30146c3f-33eb-4abc-99f7-2270e1156f5e,"You better watch your back, loser.",negativity,30146c3f-33eb-4abc-99f7-2270e1156f5e.jpg,aggression,YouTube link,https://www.youtube.com/watch?v=7X3f2fMZL1c,Promotes peacebuilding.


In [6]:
replacement_feed = df[['post_id', 'post_text', 'detected_theme', 
                       'replacement_type', 'replacement_content', 'reasoning']].copy()
replacement_feed.rename(columns={'post_text': 'original_text'}, inplace=True)
replacement_feed.to_csv("replacement_feed.csv", index=False)
print(" replacement_feed.csv saved.")


 replacement_feed.csv saved.


In [7]:
def suggest_educational_alternative(post_text: str, category: str = None) -> dict:
    theme = detect_theme(post_text, category)
    replacement_type, replacement_content, reasoning = get_replacement(theme)
    return {
        "detected_theme": theme,
        "replacement_type": replacement_type,
        "replacement_content": replacement_content,
        "reasoning": reasoning
    }

# Try sample
suggest_educational_alternative("You're so dumb", "toxicity")


{'detected_theme': 'toxic language',
 'replacement_type': 'AI-generated',
 'replacement_content': 'Learn about empathy through kindness stories ',
 'reasoning': 'Promotes empathy.'}