In [1]:
from google.colab import files

uploaded = files.upload()  # upload: usernames-train.txt, usernames-dev.txt, usernames-test.txt


Saving usernames-train.txt to usernames-train.txt
Saving usernames-devs.txt to usernames-devs.txt
Saving usernames-test.txt to usernames-test.txt


In [3]:
import os
os.listdir()


['.config',
 'usernames-train.txt',
 'usernames-test.txt',
 'usernames-devs.txt',
 'sample_data']

In [4]:
import pandas as pd

# Load with error handling for malformed lines
def load_username_dataset(file_path):
    return pd.read_csv(
        file_path,
        sep=';',
        header=None,
        names=["name", "username", "label"],
        on_bad_lines='skip',
        engine='python'
    )

# Load the original versions of each file
train_df = load_username_dataset("usernames-train.txt")
dev_df = load_username_dataset("usernames-devs.txt")
test_df = load_username_dataset("usernames-test.txt")

# Combine all into a single dataframe
full_df = pd.concat([train_df, dev_df, test_df], ignore_index=True)

# Preview the result
full_df.sample(5, random_state=42)


Unnamed: 0,name,username,label
1122,kondo satomi,@satoumii0330,correct
184394,Pragyan Mohanty,@pragmoha,correct
187224,Jax Teller,@__johnphillips,correct
130024,TreezyPHI,@Letoto1213,incorrect
151571,KathNiel Zambales,@ZaRdOz420WPN,incorrect


In [5]:
import pandas as pd

# Define keyword lists for each category
blocked_keywords = ['porn', 'sex', 'xxx', 'nude', 'kill', 'terror', 'bomb']
risky_keywords = ['maga', 'liberty', 'freedom', 'patriot', 'gun', 'fire', '69']
safe_keywords = ['love', 'home', 'music', 'nature', 'book', 'travel', 'food']

# Function to assign labels based on keywords
def assign_label(username):
    uname = username.lower()
    if any(word in uname for word in blocked_keywords):
        return 'blocked'
    elif any(word in uname for word in risky_keywords):
        return 'risky'
    else:
        return 'safe'

# Apply the labeling function to the dataset
full_df['ts_label'] = full_df['username'].apply(assign_label)

# Display the distribution of labels
label_distribution =_


In [6]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Prepare data
X = full_df['username']
y = full_df['ts_label']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Vectorize usernames
vectorizer = CountVectorizer(analyzer='char_wb', ngram_range=(3, 5))
X_vec = vectorizer.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_vec, y_encoded, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.9829
Classification Report:
               precision    recall  f1-score   support

     blocked       0.41      0.53      0.47       147
       risky       0.26      0.61      0.37       274
        safe       1.00      0.99      0.99     44191

    accuracy                           0.98     44612
   macro avg       0.56      0.71      0.61     44612
weighted avg       0.99      0.98      0.99     44612

Confusion Matrix:
 [[   78     0    69]
 [    0   168   106]
 [  110   478 43603]]


In [7]:
def classify_username(fuck):
    vec = vectorizer.transform([fuck])
    pred = model.predict(vec)
    label = label_encoder.inverse_transform(pred)[0]
    return label

# Example
print(classify_username("carlover69"))



safe


In [8]:
# Split data by label
safe_df = full_df[full_df['ts_label'] == 'safe']
risky_df = full_df[full_df['ts_label'] == 'risky']
blocked_df = full_df[full_df['ts_label'] == 'blocked']

# Match safe count to risky + blocked
balanced_safe_df = safe_df.sample(n=len(risky_df) + len(blocked_df), random_state=42)

# Combine and shuffle
balanced_df = pd.concat([balanced_safe_df, risky_df, blocked_df]).sample(frac=1, random_state=42).reset_index(drop=True)

# Check the new distribution
balanced_df['ts_label'].value_counts()


Unnamed: 0_level_0,count
ts_label,Unnamed: 1_level_1
safe,2175
risky,1436
blocked,739


In [9]:
# Recreate your balanced dataset if you already have full_df
safe_df = full_df[full_df['ts_label'] == 'safe']
risky_df = full_df[full_df['ts_label'] == 'risky']
blocked_df = full_df[full_df['ts_label'] == 'blocked']

# Match safe count to total of risky + blocked
balanced_safe_df = safe_df.sample(n=len(risky_df) + len(blocked_df), random_state=42)

# Combine and shuffle
balanced_df = pd.concat([balanced_safe_df, risky_df, blocked_df]).sample(frac=1, random_state=42).reset_index(drop=True)


In [10]:
X = balanced_df['username']
y = balanced_df['ts_label']


In [11]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Use the balanced dataset
X = balanced_df['username']
y = balanced_df['ts_label']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Vectorize usernames
vectorizer = CountVectorizer(analyzer='char_wb', ngram_range=(3, 5))
X_vec = vectorizer.fit_transform(X)

# Split and train
X_train, X_test, y_train, y_test = train_test_split(X_vec, y_encoded, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.8172
Classification Report:
               precision    recall  f1-score   support

     blocked       0.62      1.00      0.77       161
       risky       0.83      0.99      0.90       297
        safe       1.00      0.62      0.76       412

    accuracy                           0.82       870
   macro avg       0.82      0.87      0.81       870
weighted avg       0.87      0.82      0.81       870

Confusion Matrix:
 [[161   0   0]
 [  2 295   0]
 [ 96  61 255]]


In [12]:
# Predict on all usernames (from original full_df)
X_all = vectorizer.transform(full_df['username'])
y_pred_all = model.predict(X_all)
predicted_labels = label_encoder.inverse_transform(y_pred_all)

# Add predictions to the original DataFrame
full_df['model_prediction'] = predicted_labels

# Show examples of usernames predicted as risky
print("🟠 Model flagged these as RISKY:")
display(full_df[full_df['model_prediction'] == 'risky'].sample(10, random_state=1))

# Show examples of usernames predicted as blocked
print("\n🔴 Model flagged these as BLOCKED:")
display(full_df[full_df['model_prediction'] == 'blocked'].sample(10, random_state=2))


🟠 Model flagged these as RISKY:


Unnamed: 0,name,username,label,ts_label,model_prediction
129875,J.P. Bucyensenge,@FinRReg,incorrect,safe,risky
169632,SAfm news,@SAfmnews,correct,safe,risky
93309,Mag Nota: 360V,@TOMIHO_Magic,incorrect,safe,risky
77573,corrie james,@Mr_Relevant18,correct,safe,risky
221896,Greg Chin,@rv_pandey,incorrect,safe,risky
181409,Kylo Ren,@ScottTa72938956,incorrect,safe,risky
82246,Neuro Doctii,@doctii,correct,safe,risky
115467,eAvonLake.com,@DM_19XX,incorrect,safe,risky
199520,@dilnaza_t1994,@Dilnaza_t1994,correct,safe,risky
68248,jobs freedom 500K,@jobsfreedom,correct,risky,risky



🔴 Model flagged these as BLOCKED:


Unnamed: 0,name,username,label,ts_label,model_prediction
84271,Santiago Montes,@fotuzlab,incorrect,safe,blocked
10860,X-Factor Fastpitch,@Xfactorsball,correct,safe,blocked
189739,Jeslalyn Jeon,@Kookiejess,correct,safe,blocked
18237,Helen Ross,@Tighnacoille,correct,safe,blocked
30155,Grant W. McDonald,@GWillMac6,correct,safe,blocked
73459,Amelie D'Hers,@thtshortgirl,correct,safe,blocked
30093,Jessica Carrasco,@JessSexy21,correct,blocked,blocked
214926,ashley pryce,@BarcelonaBuzz,incorrect,safe,blocked
62334,Red,@JesseUtd,correct,safe,blocked
42399,cass,@casslorn,correct,safe,blocked


In [13]:
def classify_username(mutaz):
    """Returns the predicted T&S label for a single username."""
    vec = vectorizer.transform([mutaz])
    pred = model.predict(vec)
    label = label_encoder.inverse_transform(pred)[0]
    return label

# Example usage:
print(classify_username("cool_guy69"))
print(classify_username("booklover123"))
print(classify_username("pornking999"))


risky
risky
blocked


In [14]:
full_df['ts_label'] = full_df['username'].apply(assign_label)


In [15]:
blocked_keywords = [
    # Adult / NSFW
    "porn", "sex", "nude", "xxx", "onlyfans", "slut", "whore", "nsfw", "strip", "camgirl",

    # Violence / Harm
    "kill", "murder", "gunman", "terror", "bomb", "shooter", "slaughter", "rape", "stab", "hitman",

    # Hate / Extremism
    "nazi", "hitler", "kkk", "lynch", "holocaust", "jihad", "isis", "martyr",

    # Drugs
    "cocaine", "meth", "weed", "420", "lsd", "crack", "drugdealer", "dope"
]


In [16]:
risky_keywords = [
    # Political
    "maga", "patriot", "liberty", "freedom", "conspiracy", "truthseeker", "deepstate", "trump", "biden",

    # Scams / Financial
    "cashapp", "freebitcoin", "giveaway", "crypto", "forex", "investnow", "fastmoney", "lottery", "venmo",

    # Suggestive
    "69", "hotstuff", "sugarbaby", "daddy", "babe", "hottie", "freaky",

    # Suspicious tech
    "bot", "hacker", "spoof", "breach", "phish", "leak", "darkweb", "anon"
]


In [17]:
def rule_based_classify(username):
    uname = username.lower()

    if any(word in uname for word in blocked_keywords):
        return "blocked"
    elif any(word in uname for word in risky_keywords):
        return "risky"
    else:
        return "safe"


In [18]:
def test_usernames(usernames):
    results = []
    for name in usernames:
        label = rule_based_classify(name)
        results.append({"username": name, "prediction": label})
    return pd.DataFrame(results)

# Sample usernames to test
test_list = [
    "johnsmith",            # safe
    "maga_patriot69",       # risky
    "killthevibe",          # blocked
    "sugarbabe420",         # risky
    "camgirl_live",         # blocked
    "family_trips",         # safe
    "crypto_investor",      # risky
    "porncollector",        # blocked
    "anonymous_breach",     # risky
    "bookworm_life",        # safe
]

# Run the test
test_results = test_usernames(test_list)
test_results


Unnamed: 0,username,prediction
0,johnsmith,safe
1,maga_patriot69,risky
2,killthevibe,blocked
3,sugarbabe420,blocked
4,camgirl_live,blocked
5,family_trips,safe
6,crypto_investor,risky
7,porncollector,blocked
8,anonymous_breach,risky
9,bookworm_life,safe


In [19]:
blocked_keywords += [
    "nigger", "nigga", "chink", "kike", "spic", "fag", "faggot",
    "retard", "tranny", "coon", "wetback", "gook", "towelhead", "raghead"
]


In [20]:
blocked_keywords += [
    "fuck", "fucker", "fucking", "shit", "bitch", "cunt", "dick", "pussy",
    "asshole", "motherfucker", "slut", "whore", "cum", "blowjob"
]


In [21]:
blocked_keywords += [
    "incest", "childporn", "cp", "pedo", "paedo", "bestiality", "zoophilia",
    "molest", "groomer", "map", "loli", "underage"
]


In [22]:
adult_keywords = [
    "porn", "sex", "xxx", "nude", "onlyfans", "strip", "camgirl", "blowjob", "cum", "anal", "dp",
    "fucking", "suck", "tits", "boobs", "dildo", "fetish", "nsfw", "69", "orgy", "bangbus"
]


In [23]:
violence_keywords = [
    "kill", "murder", "bomb", "terror", "jihad", "isis", "martyr", "shoot", "lynch", "execute", "stab", "bloodbath", "genocide"
]


In [24]:
child_abuse_keywords = [
    "incest", "pedo", "paedo", "childporn", "cp", "map", "loli", "underage", "groomer", "molest", "bestiality", "zoophilia"
]


In [25]:
profanity_keywords = [
    "fuck", "fucker", "shit", "bitch", "slut", "whore", "dick", "pussy", "asshole", "cunt", "motherfucker"
]


In [26]:
risky_keywords = [
    "maga", "patriot", "liberty", "freedom", "trump", "biden", "deepstate", "conspiracy",
    "cashapp", "crypto", "freebitcoin", "lottery", "giveaway", "venmo", "investnow"
]


In [27]:
# 7. Drugs / Substances
drug_keywords = [
    "cocaine", "crack", "heroin", "meth", "lsd", "ecstasy", "weed", "420", "dope",
    "drugdealer", "ketamine", "molly", "pills", "opium", "fentanyl"
]


In [28]:
# 1. Adult / NSFW
adult_keywords = [
    "porn", "sex", "xxx", "nude", "onlyfans", "strip", "camgirl", "blowjob", "cum", "anal", "dp",
    "fucking", "suck", "tits", "boobs", "dildo", "fetish", "nsfw", "69", "orgy", "bangbus"
]

# 2. Hate Speech
hate_keywords = [
    "nigger", "nigga", "faggot", "fag", "chink", "kike", "spic", "retard", "tranny",
    "coon", "wetback", "gook", "towelhead", "raghead"
]

# 3. Violence / Extremism
violence_keywords = [
    "kill", "murder", "bomb", "terror", "jihad", "isis", "martyr", "shoot", "lynch", "execute", "stab", "bloodbath", "genocide"
]

# 4. Child Exploitation / Illegality
child_abuse_keywords = [
    "incest", "pedo", "paedo", "childporn", "cp", "map", "loli", "underage", "groomer", "molest", "bestiality", "zoophilia"
]

# 5. Profanity / Vulgarity
profanity_keywords = [
    "fuck", "fucker", "shit", "bitch", "slut", "whore", "dick", "pussy", "asshole", "cunt", "motherfucker"
]

# 6. Risky / Borderline
risky_keywords = [
    "maga", "patriot", "liberty", "freedom", "trump", "biden", "deepstate", "conspiracy",
    "cashapp", "crypto", "freebitcoin", "lottery", "giveaway", "venmo", "investnow"
]

# ✅ Combine categories into your main lists
blocked_keywords = adult_keywords + hate_keywords + violence_keywords + child_abuse_keywords + profanity_keywords


In [29]:
blocked_keywords = adult_keywords + hate_keywords + violence_keywords + child_abuse_keywords + profanity_keywords
risky_keywords = risky_keywords  # already defined above


In [30]:
def classify_with_category(username):
    uname = username.lower()

    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"
    return "safe", "clean"


In [31]:
def test_username(username):
    result = rule_based_classify(username)
    print(f"🧾 Username: {username} → Prediction: {result}")


In [32]:
def test_username(username):
    result = rule_based_classify(username)
    print(f"🧾 Username: {username} → Prediction: {result}")


In [33]:
blocked_keywords = (
    adult_keywords + hate_keywords + violence_keywords +
    child_abuse_keywords + profanity_keywords + drug_keywords
)


In [34]:
# 🔞 Expanded Adult / NSFW Keywords (including variants & leetspeak)
adult_keywords = [
    "porn", "sex", "xxx", "nude", "onlyfans", "strip", "camgirl", "blowjob", "cum", "anal", "dp",
    "fucking", "suck", "tits", "boobs", "dildo", "fetish", "nsfw", "69", "orgy", "bangbus",
    "cock", "bigcock", "hardcock", "lovecock", "suckcock", "c0ck", "c*ck", "d1ck", "d1ildo",
    "t1ts", "t!ts", "t!tties", "deepthroat", "cumshot", "booty", "milf", "bj", "daddy69"
]

# 💬 Hate Speech / Slurs
hate_keywords = [
    "nigger", "nigga", "faggot", "fag", "chink", "kike", "spic", "retard", "tranny",
    "coon", "wetback", "gook", "towelhead", "raghead"
]

# 🔪 Violence / Extremism
violence_keywords = [
    "kill", "murder", "bomb", "terror", "jihad", "isis", "martyr", "shoot", "lynch", "execute", "stab", "bloodbath", "genocide"
]

# 🚨 Child Abuse / Illegality
child_abuse_keywords = [
    "incest", "pedo", "paedo", "childporn", "cp", "map", "loli", "underage", "groomer", "molest", "bestiality", "zoophilia"
]

# 💢 Profanity
profanity_keywords = [
    "fuck", "fucker", "shit", "bitch", "slut", "whore", "dick", "pussy", "asshole", "cunt", "motherfucker"
]

# 💊 Drug-related
drug_keywords = [
    "cocaine", "crack", "heroin", "meth", "lsd", "ecstasy", "weed", "420", "dope",
    "drugdealer", "ketamine", "molly", "pills", "opium", "fentanyl"
]

# 🟠 Risky / Borderline
risky_keywords = [
    "maga", "patriot", "liberty", "freedom", "trump", "biden", "deepstate", "conspiracy",
    "cashapp", "crypto", "freebitcoin", "lottery", "giveaway", "venmo", "investnow"
]

# ✅ Combine all categories
blocked_keywords = (
    adult_keywords + hate_keywords + violence_keywords +
    child_abuse_keywords + profanity_keywords + drug_keywords
)

# 🔍 Rule-based classifier with category explanation
def classify_with_category(username):
    uname = username.lower()

    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"

# ✨ Testing function
def test_username(username):
    label, category = classify_with_category(username)
    print(f"🧾 Username: {username} → Prediction: {label.upper()} ({category})")


In [35]:
profanity_keywords += [
    "ass", "arse", "butt", "booty", "nips", "boob", "knob"
]


In [36]:
drug_keywords += [
    "kush", "blunt", "reefer", "ganja", "marijuana", "stoner", "highaf", "smokeweed", "weedlover"
]


In [37]:
hate_keywords += [
    "nigg"  # covers partials like nigg, niggz, niggy, etc.
]


In [38]:
"lezbo", "lezbian", "feminazi", "manhater"


('lezbo', 'lezbian', 'feminazi', 'manhater')

In [39]:
hate_keywords += [
    "dyke"
]


In [40]:
adult_keywords += [
    "wetgirl", "wetgirls", "wetpussy", "wetass", "wetdream",
    "hotgirl", "sexygirl", "naughtygirl", "bustybabe", "sluttygirl"
]


In [41]:
def classify_with_category(username):
    uname = username.lower()

    # Smart sexualized patterns
    sexualized_combos = ["wet", "hot", "sexy", "naughty", "busty", "slutty"]
    if any(word in uname for word in sexualized_combos) and "girl" in uname:
        return "blocked", "adult"

    # Keyword-based filters
    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"


In [42]:
def classify_with_category(username):
    uname = username.lower()

    # 🔞 Sexualized combo patterns (must come before keyword matching)
    sexualized_prefixes = ["wet", "hot", "sexy", "naughty", "busty", "slutty", "freaky", "tight", "horny"]
    sexualized_targets = ["girl", "boy", "mom", "milf", "babe", "hottie", "daddy", "baby", "gf", "teen"]

    if any(p in uname for p in sexualized_prefixes) and any(t in uname for t in sexualized_targets):
        return "blocked", "adult"

    # Keyword-based detection
    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"


In [43]:
def classify_with_category(username):
    uname = username.lower()

    # 🔞 1. Sexualized combo patterns
    sexualized_prefixes = ["wet", "hot", "sexy", "naughty", "busty", "slutty", "freaky", "tight", "horny"]
    sexualized_targets = ["girl", "boy", "mom", "milf", "babe", "hottie", "daddy", "baby", "gf", "teen"]

    if any(p in uname for p in sexualized_prefixes) and any(t in uname for t in sexualized_targets):
        return "blocked", "adult"

    # 🔽 2. Bottom-specific explicit phrases
    explicit_bottom_phrases = ["powerbottom", "bottom69", "bottomboy", "bottomslut"]
    if any(phrase in uname for phrase in explicit_bottom_phrases):
        return "blocked", "adult"

    # 🔒 3. Keyword category matching
    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"


In [44]:
def classify_with_category(username):
    uname = username.lower()

    # 🔞 1. Sexualized combo patterns
    sexualized_prefixes = ["wet", "hot", "sexy", "naughty", "busty", "slutty", "freaky", "tight", "horny"]
    sexualized_targets = ["girl", "boy", "mom", "milf", "babe", "hottie", "daddy", "baby", "gf", "teen"]
    if any(p in uname for p in sexualized_prefixes) and any(t in uname for t in sexualized_targets):
        return "blocked", "adult"

    # 🔽 2. Bottom-specific explicit phrases
    explicit_bottom_phrases = ["powerbottom", "bottom69", "bottomboy", "bottomslut"]
    if any(phrase in uname for phrase in explicit_bottom_phrases):
        return "blocked", "adult"

    # 🔞 3. Sexual euphemisms & slang
    euphemism_phrases = [
        "hungryholes", "openholes", "tightpussy", "wetholes",
        "deepinside", "bangme", "lickme", "takeit", "spreadlegs"
    ]
    if any(phrase in uname for phrase in euphemism_phrases):
        return "blocked", "adult"

    # 🛡️ 4. Keyword category matching
    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"


In [45]:
import re

def normalize_username(uname):
    uname = uname.lower()

    # Replace common obfuscations
    uname = re.sub(r"[^a-z0-9]", "", uname)  # remove *, _, ., etc.
    uname = uname.replace("ph", "f").replace("q", "ck")

    # Add more patterns if needed
    return uname


In [46]:
import re

# Normalization function for obfuscated usernames
def normalize_username(uname):
    uname = uname.lower()
    uname = re.sub(r"[^a-z0-9]", "", uname)  # remove *, _, ., spaces, etc.
    uname = uname.replace("ph", "f")         # phuck → fuck
    uname = uname.replace("q", "ck")         # fuq → fuck
    return uname

# Final classifier function
def classify_with_category(username):
    uname = normalize_username(username)

    # 1. Sexualized combo patterns
    sexualized_prefixes = ["wet", "hot", "sexy", "naughty", "busty", "slutty", "freaky", "tight", "horny"]
    sexualized_targets = ["girl", "boy", "mom", "milf", "babe", "hottie", "daddy", "baby", "gf", "teen"]
    if any(p in uname for p in sexualized_prefixes) and any(t in uname for t in sexualized_targets):
        return "blocked", "adult"

    # 2. Bottom-specific explicit phrases
    explicit_bottom_phrases = ["powerbottom", "bottom69", "bottomboy", "bottomslut"]
    if any(phrase in uname for phrase in explicit_bottom_phrases):
        return "blocked", "adult"

    # 3. Euphemisms and sexual slang
    euphemism_phrases = [
        "hungryholes", "openholes", "tightpussy", "wetholes",
        "deepinside", "bangme", "lickme", "takeit", "spreadlegs"
    ]
    if any(phrase in uname for phrase in euphemism_phrases):
        return "blocked", "adult"

    # 4. Keyword category matching
    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"


In [47]:
def normalize_username(uname):
    uname = uname.lower()

    # Common replacements (specific pattern-to-word rules)
    replacements = {
        r"f[\W_]*u[\W_]*c[\W_]*k": "fuck",
        r"f[\W_]*\*{1,2}[\W_]*k": "fuck",
        r"phuck": "fuck",
        r"fuq": "fuck",
        r"fux": "fuck",
        r"s[\W_]*h[\W_]*i[\W_]*t": "shit",
        r"b[\W_]*i[\W_]*t[\W_]*c[\W_]*h": "bitch",
        r"c[\W_]*u[\W_]*n[\W_]*t": "cunt"
    }

    for pattern, replacement in replacements.items():
        uname = re.sub(pattern, replacement, uname)

    # Remove leftover symbols
    uname = re.sub(r"[^a-z0-9]", "", uname)

    return uname


In [48]:
hate_keywords += [
    "nazi", "nazis", "nazism", "neo-nazi", "neonazi"
]


In [49]:
hate_keywords = [
    "nazi", "neo-nazi", "white power", "white pride", "white supremacy", "kkk", "klan",
    "heil hitler", "1488", "88", "14words", "sixmillionlies", "holocaust denial",
    "zionazi", "antisemitic", "jewboy", "oven dodger", "gas the jews", "dirty jew",
    "filthy jew", "jew rat", "jewlover", "globalist agenda", "zionist puppet",
    "jewish elite", "international bankers", "new world order", "rothschild",
    "jewish cabal", "jewish power", "zionist control", "jewish media", "blood libel",
    "synagogue of satan", "the tribe", "goyim know"
]


In [50]:
violence_keywords = [
    "kill", "murder", "assassinate", "slaughter", "massacre", "genocide", "lynch",
    "behead", "decapitate", "shoot", "stab", "bomb", "explode", "terrorist", "terrorism",
    "suicide bomber", "jihad", "martyrdom", "execute", "execution", "hang", "burn alive"
]


In [51]:
drug_keywords = [
    "cocaine", "crack", "heroin", "meth", "methamphetamine", "lsd", "acid", "ecstasy",
    "mdma", "molly", "weed", "marijuana", "cannabis", "pot", "hash", "hashish", "opium",
    "opiates", "fentanyl", "xanax", "valium", "adderall", "oxy", "oxycodone", "percocet",
    "vicodin", "ketamine", "shrooms", "psilocybin", "dmt", "ayahuasca", "salvia"
]


In [52]:
obfuscated_keywords = [
    "f*ck", "f**k", "f***", "s3x", "s*x", "p0rn", "pr0n", "n@zi", "n@z1", "k1ke", "k!ke",
    "j3w", "j3w5", "g4y", "g@y", "d1ke", "d!ke", "c*m", "c**", "b!tch", "b1tch", "sh!t",
    "sh1t", "a$$", "@ss", "n1gg3r", "n!gger", "n1gg@"
]


In [53]:
child_abuse_keywords = [
    "child porn", "child pornography", "cp", "kiddie porn", "pedo", "paedo", "pedophile",
    "paedophile", "child molester", "child abuse", "underage sex", "incest", "loli",
    "shota", "baby rape", "toddlercon", "preteen sex", "young girl nude", "young boy nude"
]


In [54]:
hate_keywords = [
    "nazi", "neo-nazi", "white power", "white pride", "white supremacy", "kkk", "klan",
    "heil hitler", "1488", "88", "14words", "sixmillionlies", "holocaust denial",
    "zionazi", "antisemitic", "jewboy", "oven dodger", "gas the jews", "dirty jew",
    "filthy jew", "jew rat", "jewlover", "globalist agenda", "zionist puppet",
    "jewish elite", "international bankers", "new world order", "rothschild",
    "jewish cabal", "jewish power", "zionist control", "jewish media", "blood libel",
    "synagogue of satan", "the tribe", "goyim know"
]


In [55]:
adult_keywords = [
    "porn", "porno", "pornography", "xxx", "sex", "sexy", "nude", "naked", "hardcore",
    "bdsm", "fetish", "cum", "cumming", "orgy", "anal", "vaginal", "dildo", "vibrator",
    "masturbate", "masturbation", "blowjob", "handjob", "rimjob", "threesome", "foursome",
    "gangbang", "squirting", "pegging", "strapon", "milf", "gilf", "teen", "barely legal"
]


In [56]:
profanity_keywords = [
    "fuck", "shit", "bitch", "cunt", "asshole", "dick", "pussy", "bastard", "slut", "whore",
    "motherfucker", "nigger", "faggot", "dyke", "tranny", "kike", "chink", "spic", "gook",
    "wop", "kraut", "raghead", "camel jockey", "sand nigger", "zipperhead", "wetback"
]


In [57]:
import re

def classify_with_category(username):
    uname = normalize_username(username)

    # ⛔ Detect antisemitic triple parentheses (((echo)))
    if re.search(r"\({3}.*\){3}", username):
        return "blocked", "hate"

    # 1. Sexualized combo patterns
    sexualized_prefixes = ["wet", "hot", "sexy", "naughty", "busty", "slutty", "freaky", "tight", "horny"]
    sexualized_targets = ["girl", "boy", "mom", "milf", "babe", "hottie", "daddy", "baby", "gf", "teen"]
    if any(p in uname for p in sexualized_prefixes) and any(t in uname for t in sexualized_targets):
        return "blocked", "adult"

    # 2. Bottom-related phrases
    explicit_bottom_phrases = ["powerbottom", "bottom69", "bottomboy", "bottomslut"]
    if any(phrase in uname for phrase in explicit_bottom_phrases):
        return "blocked", "adult"

    # 3. Sexual euphemisms
    euphemism_phrases = [
        "hungryholes", "openholes", "tightpussy", "wetholes",
        "deepinside", "bangme", "lickme", "takeit", "spreadlegs"
    ]
    if any(phrase in uname for phrase in euphemism_phrases):
        return "blocked", "adult"

    # 4. Keyword matching
    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"


In [58]:
antisemitic_keywords = [
    "kike", "heeb", "yid", "zionazi", "jewboy", "hooknose", "oven dodger",
    "gas the jews", "jewish scum", "dirty jew", "filthy jew", "jew rat", "jewlover"
]


In [59]:
import re

def classify_with_category(username):
    uname = normalize_username(username)

    # 0. Detect (((echo))) antisemitic format
    if re.search(r"\({3}.*\){3}", username):
        return "blocked", "hate"

    # 1. Explicit antisemitic phrases
    antisemitic_phrases = [
        "ihatejews", "killjews", "gasjews", "death2jews", "nukejews", "banjews",
        "attackjews", "jewhater", "fuckjews", "diejews", "jewslayer"
    ]
    if any(phrase in uname for phrase in antisemitic_phrases):
        return "blocked", "hate"

    # 2. Combo hate verbs + targeted group
    hate_verbs = ["hate", "kill", "gas", "ban", "destroy", "fuck", "bomb", "die"]
    targeted_groups = ["jews", "jew", "muslims", "blacks", "gays", "trans", "women"]
    if any(h in uname for h in hate_verbs) and any(t in uname for t in targeted_groups):
        return "blocked", "hate"

    # 3. Sexualized combos
    sexualized_prefixes = ["wet", "hot", "sexy", "naughty", "busty", "slutty", "freaky", "tight", "horny"]
    sexualized_targets = ["girl", "boy", "mom", "milf", "babe", "hottie", "daddy", "baby", "gf", "teen"]
    if any(p in uname for p in sexualized_prefixes) and any(t in uname for t in sexualized_targets):
        return "blocked", "adult"

    # 4. Bottom-related explicit phrases
    explicit_bottom_phrases = ["powerbottom", "bottom69", "bottomboy", "bottomslut"]
    if any(phrase in uname for phrase in explicit_bottom_phrases):
        return "blocked", "adult"

    # 5. Euphemisms and sexual slang
    euphemism_phrases = [
        "hungryholes", "openholes", "tightpussy", "wetholes",
        "deepinside", "bangme", "lickme", "takeit", "spreadlegs"
    ]
    if any(phrase in uname for phrase in euphemism_phrases):
        return "blocked", "adult"

    # 6. Category-based matching
    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"


In [60]:
hate_verbs = ["hate", "kill", "gas", "ban", "destroy", "fuck", "bomb", "die"]

targeted_groups = [
    # Religions
    "jews", "jew", "muslims", "christians", "catholics", "hindus", "buddhists", "islam", "judaism", "christianity",

    # Races / ethnicities / identities
    "blacks", "black", "asians", "latinos", "whites", "gays", "lesbians", "trans", "queers", "lgbtq", "immigrants", "women"
]


In [61]:
import re

def classify_with_category(username):
    uname = normalize_username(username)

    # 0. Detect (((echo))) antisemitic format
    if re.search(r"\({3}.*\){3}", username):
        return "blocked", "hate"

    # 1. Antisemitic slur phrases
    antisemitic_phrases = [
        "ihatejews", "killjews", "gasjews", "death2jews", "nukejews", "banjews",
        "attackjews", "jewhater", "fuckjews", "diejews", "jewslayer"
    ]
    if any(phrase in uname for phrase in antisemitic_phrases):
        return "blocked", "hate"

    # 2. Hate verb + group combo logic
    hate_verbs = ["hate", "kill", "gas", "ban", "destroy", "fuck", "bomb", "die"]
    targeted_groups = [
        "jews", "jew", "muslims", "christians", "catholics", "hindus", "buddhists", "islam", "judaism", "christianity",
        "blacks", "black", "asians", "latinos", "whites", "gays", "lesbians", "trans", "queers", "lgbtq", "immigrants", "women"
    ]
    if any(h in uname for h in hate_verbs) and any(t in uname for t in targeted_groups):
        return "blocked", "hate"

    # 3. Sexualized combos
    sexualized_prefixes = ["wet", "hot", "sexy", "naughty", "busty", "slutty", "freaky", "tight", "horny"]
    sexualized_targets = ["girl", "boy", "mom", "milf", "babe", "hottie", "daddy", "baby", "gf", "teen"]
    if any(p in uname for p in sexualized_prefixes) and any(t in uname for t in sexualized_targets):
        return "blocked", "adult"

    # 4. Bottom-related phrases
    explicit_bottom_phrases = ["powerbottom", "bottom69", "bottomboy", "bottomslut"]
    if any(phrase in uname for phrase in explicit_bottom_phrases):
        return "blocked", "adult"

    # 5. Euphemisms and sexual slang
    euphemism_phrases = [
        "hungryholes", "openholes", "tightpussy", "wetholes",
        "deepinside", "bangme", "lickme", "takeit", "spreadlegs"
    ]
    if any(phrase in uname for phrase in euphemism_phrases):
        return "blocked", "adult"

    # 6. Category-based matching
    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"


In [62]:
violence_keywords += [
    "slasher", "bloodlust", "cutthroat", "knifeboy", "goreking", "chainsawman"
]


In [63]:
import re

def normalize_username(uname):
    uname = uname.lower()
    replacements = {
        r"f[\W_]*u[\W_]*c[\W_]*k": "fuck",
        r"f[\W_]*\*{1,2}[\W_]*k": "fuck",
        r"phuck": "fuck",
        r"fuq": "fuck",
        r"fux": "fuck",
        r"s[\W_]*h[\W_]*i[\W_]*t": "shit",
        r"b[\W_]*i[\W_]*t[\W_]*c[\W_]*h": "bitch",
        r"c[\W_]*u[\W_]*n[\W_]*t": "cunt"
    }
    for pattern, replacement in replacements.items():
        uname = re.sub(pattern, replacement, uname)
    uname = re.sub(r"[^a-z0-9]", "", uname)
    return uname

def classify_with_category(username):
    uname = normalize_username(username)

    # Echo antisemitic format (((name)))
    if re.search(r"\({3}.*\){3}", username):
        return "Blocked (hate)"

    # Antisemitic slurs and phrases
    antisemitic_phrases = [
        "ihatejews", "killjews", "gasjews", "banjews", "jewhater", "fuckjews"
    ]
    if any(phrase in uname for phrase in antisemitic_phrases):
        return "Blocked (hate)"

    # Hate verbs + targeted groups (combo check)
    hate_verbs = ["hate", "kill", "gas", "ban", "destroy", "fuck", "bomb", "die"]
    targeted_groups = [
        "jews", "muslims", "muslms", "moslems", "christians", "catholics", "gays",
        "trans", "lesbians", "queers", "women", "immigrants"
    ]
    if any(h in uname for h in hate_verbs) and any(t in uname for t in targeted_groups):
        return "Blocked (hate)"

    return "Safe"


In [64]:
def test_username(username):
    result = classify_with_category(username)
    print(f"🧾 Username: {username} → Prediction: {result}")


In [65]:
profanity_keywords = ["fuck", "shit", "bitch", "cunt", "slut", "whore", "dick", "pussy"]

def classify_with_category(username):
    uname = normalize_username(username)

    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"

    return "safe", "clean"


In [66]:
import re

# 1. Normalize usernames (handle symbols, lowercase, etc.)
def normalize_username(uname):
    uname = uname.lower()
    replacements = {
        r"f[\W_]*u[\W_]*c[\W_]*k": "fuck",
        r"f[\W_]*\*{1,2}[\W_]*k": "fuck",
        r"phuck": "fuck",
        r"fuq": "fuck",
        r"s[\W_]*h[\W_]*i[\W_]*t": "shit",
        r"b[\W_]*i[\W_]*t[\W_]*c[\W_]*h": "bitch",
        r"c[\W_]*u[\W_]*n[\W_]*t": "cunt"
    }
    for pattern, replacement in replacements.items():
        uname = re.sub(pattern, replacement, uname)
    uname = re.sub(r"[^a-z0-9]", "", uname)
    return uname

# 2. Keyword Lists
profanity_keywords = ["fuck", "shit", "bitch", "cunt", "slut", "whore", "dick", "pussy", "asshole"]
hate_keywords = ["jew", "kike", "nazi", "zionazi", "dyke", "nigger", "tranny", "gook"]
adult_keywords = ["porn", "sex", "cock", "cum", "milf", "orgy", "fetish", "nude"]
violence_keywords = ["kill", "bomb", "terror", "stab", "shoot", "lynch"]
child_abuse_keywords = ["loli", "shota", "cp", "pedo", "paedo", "underage"]
drug_keywords = ["weed", "cocaine", "meth", "lsd", "mdma", "xanax", "fentanyl"]
risky_keywords = ["maga", "crypto", "freebitcoin", "deepstate"]

# 3. Targeted hate logic (combo matching)
hate_verbs = ["hate", "kill", "ban", "destroy", "fuck", "bomb", "die"]
targeted_groups = [
    "jews", "muslims", "muslms", "christians", "gays", "trans", "lesbians",
    "queers", "women", "immigrants", "black", "blacks", "asians", "whites"
]

# 4. Classifier function
def classify_with_category(username):
    uname = normalize_username(username)

    # (((echo)))
    if re.search(r"\({3}.*\){3}", username):
        return "blocked", "hate"

    # Phrase-based hate
    antisemitic_phrases = ["ihatejews", "killjews", "gasjews", "fuckjews"]
    if any(phrase in uname for phrase in antisemitic_phrases):
        return "blocked", "hate"

    # Hate verb + target group combo
    if any(h in uname for h in hate_verbs) and any(t in uname for t in targeted_groups):
        return "blocked", "hate"

    # Category-based blocking
    for word in child_abuse_keywords:
        if word in uname:
            return "blocked", "child_abuse"
    for word in hate_keywords:
        if word in uname:
            return "blocked", "hate"
    for word in adult_keywords:
        if word in uname:
            return "blocked", "adult"
    for word in violence_keywords:
        if word in uname:
            return "blocked", "violence"
    for word in profanity_keywords:
        if word in uname:
            return "blocked", "profanity"
    for word in drug_keywords:
        if word in uname:
            return "blocked", "drugs"
    for word in risky_keywords:
        if word in uname:
            return "risky", "risky"

    return "safe", "clean"

# 5. Test function
def test_username(username):
    label, category = classify_with_category(username)
    print(f"🧾 Username: {username} → Prediction: {label.upper()} ({category})")


In [67]:
keyword_lists = {
    "profanity": [
        "fuck", "shit", "bitch", "cunt", "slut", "whore", "dick", "pussy", "asshole", "motherfucker",
        "f*ck", "f**k", "f***", "s3x", "s*x", "p0rn", "pr0n", "b!tch", "b1tch", "sh!t", "sh1t", "a$$", "@ss"
    ],
    "adult": [
        "porn", "sex", "xxx", "nude", "onlyfans", "strip", "camgirl", "blowjob", "cum", "anal", "dp",
        "fucking", "suck", "tits", "boobs", "dildo", "fetish", "nsfw", "69", "orgy", "bangbus",
        "cock", "bigcock", "hardcock", "lovecock", "suckcock", "c0ck", "c*ck", "d1ck", "d1ildo",
        "t1ts", "t!ts", "t!tties", "deepthroat", "cumshot", "booty", "milf", "bj", "daddy69",
        "wetgirl", "wetgirls", "wetpussy", "wetass", "wetdream", "hotgirl", "sexygirl",
        "naughtygirl", "bustybabe", "sluttygirl", "powerbottom", "bottom69", "bottomboy",
        "bottomslut", "hungryholes", "openholes", "tightpussy", "wetholes",
        "deepinside", "bangme", "lickme", "takeit", "spreadlegs"
    ],
    "violence": [
        "kill", "murder", "bomb", "terror", "jihad", "isis", "martyr", "shoot", "lynch", "execute",
        "stab", "bloodbath", "genocide", "behead", "decapitate", "suicidebomber", "hang",
        "burnalive", "slasher"
    ],
    "child_abuse": [
        "incest", "pedo", "paedo", "childporn", "cp", "map", "loli", "shota", "underage", "groomer",
        "molest", "bestiality", "zoophilia", "kiddieporn", "babyrape", "toddlercon", "preteen"
    ],
    "drugs": [
        "cocaine", "crack", "heroin", "meth", "lsd", "ecstasy", "mdma", "molly", "weed", "420", "dope",
        "drugdealer", "ketamine", "xanax", "valium", "adderall", "oxy", "opium", "pills",
        "fentanyl", "kush", "blunt", "reefer", "ganja", "marijuana", "stoner", "highaf", "smokeweed"
    ],
    "hate": [
        "jew", "jews", "kike", "yid", "heeb", "jewboy", "hooknose", "oven dodger",
        "gas the jews", "dirty jew", "filthy jew", "jew rat", "jewlover", "zionazi",
        "synagogue of satan", "globalist agenda", "zionist puppet", "jewish elite", "jewish cabal",
        "jewish power", "zionist control", "blood libel", "goyim know", "(((name)))", "(((jews)))",
        "jewslayer", "ihatejews", "killjews", "gasjews", "banjews", "fuckjews", "diejews", "jewhater",
        "hatechristians", "banmuslims", "killmuslims", "fuckgays", "killtrans", "tranny", "nigger", "dyke", "faggot"
    ],
    "risky": [
        "maga", "patriot", "liberty", "freedom", "trump", "biden", "deepstate", "conspiracy",
        "cashapp", "crypto", "freebitcoin", "lottery", "giveaway", "venmo", "investnow"
    ],
    "obfuscated": [
        "n1gger", "n!gger", "n1gg@", "d1ke", "d!ke", "tr4nny", "c*m", "c**m", "fux", "fuq", "phuck",
        "j3w", "j3ws"
    ]
}


In [68]:
def classify_with_category(username):
    uname = normalize_username(username)

    # Check for (((echo))) pattern
    if re.search(r"\({3}.*\){3}", username):
        return "blocked", "hate"

    # Hate verb + target combo
    hate_verbs = ["hate", "kill", "ban", "destroy", "fuck", "bomb", "die"]
    targeted_groups = ["jews", "muslims", "christians", "gays", "trans", "women"]
    if any(h in uname for h in hate_verbs) and any(t in uname for t in targeted_groups):
        return "blocked", "hate"

    # Check keywords
    for category, word_list in keyword_lists.items():
        for word in word_list:
            if word in uname:
                if category == "risky":
                    return "risky", category
                else:
                    return "blocked", category

    return "safe", "clean"


In [73]:
test_username("pussy")

🧾 Username: pussy → Prediction: BLOCKED (profanity)
