Imports & Setup

In [2]:
import os, sys, pickle
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

# Ensure repo root on path
repo_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

# Paths
processed_dir = os.path.join(repo_root, 'data', 'processed')
tfidf_path    = os.path.join(processed_dir, 'tfidf.pkl')
train_csv     = os.path.join(processed_dir, 'train.csv')
val_csv       = os.path.join(processed_dir, 'val.csv')

# Labels
LABELS = ['toxic','severe_toxic','obscene','threat','insult','identity_hate']


Load Data & Vectorizer

In [3]:
train_df = pd.read_csv(train_csv)
val_df   = pd.read_csv(val_csv)

X_train = train_df['comment_text']
y_train = train_df[LABELS]
X_val   = val_df  ['comment_text']
y_val   = val_df  [LABELS]

with open(tfidf_path, 'rb') as f:
    tfidf = pickle.load(f)

print("Data and TF-IDF loaded:")
print("  • Train shape:", X_train.shape, y_train.shape)
print("  • Val   shape:", X_val.shape,   y_val.shape)


Data and TF-IDF loaded:
  • Train shape: (127656,) (127656, 6)
  • Val   shape: (31915,) (31915, 6)


Define & Train One-vs-Rest Pipeline

In [4]:
# Define base pipeline
base_pipe = Pipeline([
    ('tfidf', tfidf),
    ('clf', OneVsRestClassifier(
        LogisticRegression(
            C=1.0,
            max_iter=1000,
            class_weight='balanced',
            n_jobs=-1,
            random_state=42
        )
    ))
])

# Train
base_pipe.fit(X_train, y_train)
print("Multi-label logistic regression trained.")
print("\nClassification report on validation set:\n")
print(classification_report(y_val, base_pipe.predict(X_val), target_names=LABELS))


Multi-label logistic regression trained.

Classification report on validation set:

               precision    recall  f1-score   support

        toxic       0.60      0.85      0.70      3059
 severe_toxic       0.24      0.88      0.38       311
      obscene       0.63      0.88      0.73      1710
       threat       0.16      0.70      0.26        97
       insult       0.50      0.87      0.63      1590
identity_hate       0.18      0.81      0.30       289

    micro avg       0.49      0.86      0.63      7056
    macro avg       0.39      0.83      0.50      7056
 weighted avg       0.54      0.86      0.66      7056
  samples avg       0.06      0.08      0.06      7056



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Hyperparameter Tuning

Quick Grid Search on C

In [5]:
param_grid = {
    'clf__estimator__C': [0.1, 1.0, 10.0]
}

grid = GridSearchCV(
    estimator=base_pipe,
    param_grid=param_grid,
    scoring='f1_macro',
    cv=3,
    n_jobs=-1,
    verbose=1
)

grid.fit(X_train, y_train)
print("Best parameters found:", grid.best_params_)
print("Best CV f1_macro:", grid.best_score_)

# Re-evaluate on validation set
y_pred_tuned = grid.predict(X_val)
print("\nClassification report (tuned) on validation set:\n")
print(classification_report(y_val, y_pred_tuned, target_names=LABELS))


Fitting 3 folds for each of 3 candidates, totalling 9 fits
Best parameters found: {'clf__estimator__C': 10.0}
Best CV f1_macro: 0.5135963508061614

Classification report (tuned) on validation set:

               precision    recall  f1-score   support

        toxic       0.58      0.84      0.69      3059
 severe_toxic       0.25      0.79      0.38       311
      obscene       0.60      0.90      0.72      1710
       threat       0.23      0.60      0.33        97
       insult       0.49      0.83      0.62      1590
identity_hate       0.21      0.74      0.33       289

    micro avg       0.50      0.84      0.63      7056
    macro avg       0.39      0.78      0.51      7056
 weighted avg       0.53      0.84      0.65      7056
  samples avg       0.06      0.08      0.07      7056



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Save the Tuned Model

In [9]:
import pickle

best_model = grid.best_estimator_

model_dir = os.path.join(repo_root, 'experiments', 'logreg_multilabel')
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, 'logreg_multilabel_tuned.pkl')

with open(model_path, 'wb') as f:
    pickle.dump(best_model, f)

print("Tuned multi-label model saved to:", model_path)


Tuned multi-label model saved to: C:\Users\ual-laptop\Toxic_Bias_Audit\experiments\logreg_multilabel\logreg_multilabel_tuned.pkl
