In [None]:
# ===============================
# Install Requirements (Run this first)
# ===============================
# pip install datasets sklearn pandas xgboost joblib

# ===============================
# Imports
# ===============================
from datasets import load_dataset
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib
import gc

# ===============================
# Load Dataset with Stratification
# ===============================
print("Loading dataset...")
dataset = load_dataset("civil_comments", split="train[:100000]")  # Use more data
df = pd.DataFrame(dataset)

# ===============================
# Investigate Toxicity Distribution
# ===============================
print("Toxicity distribution:", df['toxicity'].describe())

# Create binary target - toxic or not toxic
df['target'] = (df['toxicity'] >= 0.5).astype(int)
print("Class distribution:", np.bincount(df['target']))
print(f"Positive class percentage: {df['target'].mean()*100:.2f}%")

# Example toxic comments
print("\nExample toxic comments:")
for text in df[df['target'] == 1]['text'].sample(3).values:
    print(f"- {text[:100]}...")

# ===============================
# Feature Engineering - Use Character-Level Features
# ===============================
print("\nExtracting features...")
# Use character n-grams instead of words for better signal
vectorizer = TfidfVectorizer(
    analyzer='char_wb',  # Character n-grams including word boundaries
    ngram_range=(3, 5),  # Character sequences of length 3-5
    max_features=20000,  # Use more features
    min_df=5,
    max_df=0.7  # Ignore very common character sequences
)

X = vectorizer.fit_transform(df['text'])
y = df['target'].values
print(f"Feature matrix shape: {X.shape}")

# ===============================
# Stratified Sampling for Train/Test Split
# ===============================
# Use stratified sampling to ensure both splits have toxic examples
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training distribution: {np.bincount(y_train)}")
print(f"Testing distribution: {np.bincount(y_test)}")

# Free memory
del X
gc.collect()

# ===============================
# Train a Simpler Model First - Logistic Regression
# ===============================
print("Training logistic regression model...")
model = LogisticRegression(
    C=1.0,
    class_weight='balanced',  # Handle class imbalance
    max_iter=200,
    random_state=42,
    n_jobs=-1  # Use all cores
)
model.fit(X_train, y_train)

# ===============================
# Evaluate Model
# ===============================
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# ===============================
# Sanity Check Test Cases
# ===============================
sample_comments = [
    "You're so wonderful and helpful!",
    "You're a complete waste of oxygen.",
    "What a dumb idea. You're clueless.",
    "I appreciate your perspective."
]

print("\nSample Toxicity Predictions:")
for comment in sample_comments:
    vec = vectorizer.transform([comment])
    pred = model.predict(vec)[0]
    prob = model.predict_proba(vec)[0][1]
    print(f"{comment}")
    print(f"→ Prediction: {'Toxic' if pred == 1 else 'Non-toxic'} ({prob:.3f})")
    
    # Find top features for this prediction
    if vec.nnz > 0:  # If we have any features
        feature_names = vectorizer.get_feature_names_out()
        coefs = model.coef_[0]
        
        # Get feature indices and values
        indices = vec.nonzero()[1]
        feature_weights = [(feature_names[i], vec[0, i] * coefs[i]) for i in indices]
        sorted_weights = sorted(feature_weights, key=lambda x: abs(x[1]), reverse=True)
        
        # Show top contributing features
        print("→ Top contributing features:", sorted_weights[:5])
    print()

# ===============================
# Save Model + Vectorizer
# ===============================
joblib.dump(model, "toxic_comment_model_logistic.pkl")
joblib.dump(vectorizer, "toxic_vectorizer_char.pkl")
print("Saved model and vectorizer.")

# ===============================
# Download from Colab (Optional)
# ===============================
from google.colab import files

files.download("toxic_comment_model_logistic.pkl")
files.download("toxic_vectorizer_char.pkl")