Run best model  

In [None]:
# imports
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import joblib


In [None]:
# Load full training data
train_df = pd.read_csv('data/kaggle/train.csv')


X_full = train_df['comment_text']
y_full = train_df[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']]

In [None]:

# Prepare features and multi-label targets
X = train_df['comment_text']
y = train_df[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']]

In [None]:
# %%
# Split 80% train / 20% eval
X_train, X_eval, y_train, y_eval = train_test_split(
    X_full,
    y_full,
    test_size=0.2,
    random_state=42,
    shuffle=True
)


In [None]:
final_pipeline = Pipeline([
    ('vect', CountVectorizer(ngram_range=(1, 2), max_df=0.9, min_df=1)),
    ('clf', OneVsRestClassifier(
        LogisticRegression(C=1.0, max_iter=1000)
    ))
])

In [None]:
final_pipeline.fit(X_train, y_train)

In [None]:
# Evaluate on the 20% hold-out split
from sklearn.metrics import classification_report, accuracy_score

y_pred = final_pipeline.predict(X_eval)
print("=== Final Model Evaluation on 20% Hold-Out Set ===")
print(f"Accuracy: {accuracy_score(y_eval, y_pred):.3f}")
print(classification_report(
    y_eval,
    y_pred,
    target_names=y_full.columns
))

In [None]:
# %%
model_path = 'lr_cv_tuned_.joblib'
joblib.dump(final_pipeline, model_path)
print(f"\nModel trained on 80% of data and saved to '{model_path}'")

TEST MODEL ON DATA SCRAPED FROM REDDIT

In [None]:
import pandas as pd
import joblib
from pathlib import Path

In [None]:
# path
MODEL_PATH = Path("lr_cv_tuned_.joblib")

In [None]:
# data from reddit
DATA_DIR = Path("data/reddit")  # or wherever you put them
files = [
    DATA_DIR / "r_conservative.csv",
    DATA_DIR / "r_democrats.csv",
    DATA_DIR / "r_europe.csv",
    DATA_DIR / "r_gunners.csv",
    DATA_DIR / "r_liverpoolfc.csv",
    DATA_DIR / "r_politics.csv",
    DATA_DIR / "r_worldnews.csv",
]

In [None]:
# load & concatenate
df_list = [pd.read_csv(f) for f in files]
df_all  = pd.concat(df_list, ignore_index=True)

In [None]:
# load pipeline
model = joblib.load(MODEL_PATH)

In [None]:
# 100-comment random sample
df_sample = df_all.sample(n=100, random_state=42).reset_index(drop=True)

In [None]:
# predict 100 comment sample

label_cols = ["toxic","severe_toxic","obscene","threat","insult","identity_hate"]
preds = model.predict(df_sample["body"])    # shape (n_samples, 6)

In [None]:
# all comments
label_cols = ["toxic","severe_toxic","obscene","threat","insult","identity_hate"]
preds = model.predict(df_all["body"])    # shape (n_samples, 6)

FEATURE IMPORTANCE

In [None]:

import numpy as np

vect = model.named_steps['vect']
ovr  = model.named_steps['clf']
feature_names = vect.get_feature_names_out()

# transform all Reddit comments
X_reddit = vect.transform(df_all['body'])  

# compute and print feature importances per label
for idx, label in enumerate(label_cols):
    lr      = ovr.estimators_[idx]
    w       = lr.coef_[0]
    contrib = X_reddit.multiply(w).sum(axis=0).A1  # dense array

    top_idx = np.argsort(contrib)[-20:][::-1]
    bot_idx = np.argsort(contrib)[:20]

    print(f"\n=== Reddit-based importance for '{label}' ===")
    print("Top 20 →")
    for j in top_idx:
        print(f"  {feature_names[j]:>15} : {contrib[j]:+.2f}")
    print("\nBottom 20 →")
    for j in bot_idx:
        print(f"  {feature_names[j]:>15} : {contrib[j]:+.2f}")


=== Reddit-based importance for 'toxic' ===
Top 20 →
              you : +1701.68
             shit : +891.04
             fuck : +862.03
          fucking : +807.31
               is : +684.19
               he : +634.37
              are : +430.08
           stupid : +389.55
             hate : +270.75
              get : +264.57
               go : +252.46
           in the : +242.86
             your : +230.57
             like : +217.96
              off : +207.50
             want : +181.37
            idiot : +150.55
              all : +149.38
              ass : +141.96
               up : +135.75

Bottom 20 →
              the : -2087.79
               to : -1488.91
               in : -1037.62
               it : -545.29
              for : -541.98
              but : -483.31
              and : -455.44
               at : -400.41
               of : -388.54
              not : -339.13
               we : -272.45
              can : -246.36
            there : -239.78
     

In [None]:
#Combine sample with its predictions
results = pd.concat([
    df_all,
    pd.DataFrame(preds, columns=label_cols)
], axis=1)


In [None]:
# build a results DataFrame
results_df = pd.DataFrame(preds, columns=label_cols)
df_out     = pd.concat([df_all, results_df], axis=1)

In [None]:
print(results.head(10))       # first 10 of your 100-comment sample
print("\nLabel counts in this sample:\n", results[label_cols].sum())


In [None]:
output_path = Path("reddit_sample_with_predictions.csv")
results.to_csv(output_path, index=False)
print(f"Saved sample with predictions to: {output_path}")


In [None]:
# inspect some examples
print(df_out.head())

# Summary stats: how many comments flagged toxic/insult/etc.
print(df_out[label_cols].sum())

# Save to CSV
df_out.to_csv("reddit_comments_with_predictions.csv", index=False)