Setup & Imports

In [13]:
# Cell 1: Setup & Imports
import os, sys
import pandas as pd
import pickle

from fairlearn.metrics import (
    MetricFrame,
    selection_rate,
    true_positive_rate,
    false_positive_rate,
    demographic_parity_difference
)
from sklearn.metrics import accuracy_score

# We’re in …/Toxic_Bias_Audit/ethical_audit
# Go up one level to project root
repo_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

print("Repo root:", repo_root)
print("Contents at repo root:", os.listdir(repo_root))

# Define paths
raw_csv       = os.path.join(repo_root, 'data', 'raw', 'train.csv')
val_csv       = os.path.join(repo_root, 'data', 'processed', 'val.csv')
model_path    = os.path.join(repo_root, 'experiments', 'logreg_multilabel', 'logreg_multilabel_tuned.pkl')
vectorizer_path = os.path.join(repo_root, 'data', 'processed', 'tfidf.pkl')

print("raw_csv     →", raw_csv)
print("val_csv     →", val_csv)
print("model_path  →", model_path)


Repo root: C:\Users\ual-laptop\Toxic_Bias_Audit
Contents at repo root: ['.git', '.virtual_documents', 'anaconda_projects', 'data', 'Dockerfile', 'environment.yml', 'ethical_audit', 'experiments', 'notebooks', 'README.md', 'report', 'src', 'tests']
raw_csv     → C:\Users\ual-laptop\Toxic_Bias_Audit\data\raw\train.csv
val_csv     → C:\Users\ual-laptop\Toxic_Bias_Audit\data\processed\val.csv
model_path  → C:\Users\ual-laptop\Toxic_Bias_Audit\experiments\logreg_multilabel\logreg_multilabel_tuned.pkl


Load Data, Model & Sensitive Features

In [15]:
# Cell 2: Load data and determine sensitive-feature columns

# 1. Load the raw dataset to inspect available columns
raw_df = pd.read_csv(raw_csv)
print("Raw columns:\n", raw_df.columns.tolist())

# 2. Load the processed validation split (with 'id')
val_df = pd.read_csv(val_csv)
print("\nProcessed val columns:\n", val_df.columns.tolist())

# 3. Identify identity-related columns in raw_df
#    We’ll look for columns that represent demographics or identities:
possible_identities = [
    'male','female','transgender','other_gender',
    'heterosexual','homosexual_gay_or_lesbian',
    'christian','jewish','muslim','hindu',
    'black','white','psychiatric_or_mental_illness',
    'intellectual_or_learning_disability',
    'other_race_or_ethnicity'
]
# Keep only those actually present
sens_feats = [c for c in possible_identities if c in raw_df.columns]
print("\nUsing sensitive features:", sens_feats)

# 4. Fill missing and merge onto val_df by 'id'
raw_df[sens_feats] = raw_df[sens_feats].fillna(0)
audit_df = val_df.merge(
    raw_df[['id'] + sens_feats],
    on='id',
    how='left'
).fillna(0)

# 5. Prepare arrays for the 'toxic' label audit
X_val = audit_df['comment_text']
y_true = audit_df['toxic']
# For demonstration, pick the first sensitive feature if any
group_feat = sens_feats[0] if sens_feats else None
if group_feat:
    group = audit_df[group_feat].astype(int)
    print(f"\nGrouping by: {group_feat}")
    print(group.value_counts())
else:
    raise ValueError("No sensitive-feature columns found in raw data.")


Raw columns:
 ['id', 'comment_text', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

Processed val columns:
 ['comment_text', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

Using sensitive features: []


KeyError: 'id'

Compute Fairness Metrics

In [None]:
# Choose one sensitive feature to demonstrate, e.g. 'male'
group = val_with_raw['male'].astype(int)

# Build a MetricFrame
metrics = {
    'accuracy': accuracy_score,
    'selection_rate': selection_rate,
    'true_positive_rate': true_positive_rate,
    'false_positive_rate': false_positive_rate
}

mf = MetricFrame(
    metrics=metrics,
    y_true=val_with_raw['toxic'],
    y_pred=y_pred,
    sensitive_features=group
)

print("=== Overall Metrics ===")
print(mf.overall)
print("\n=== By Group ===")
print(mf.by_group)
print("\n=== Demographic Parity Difference ===")
print(demographic_parity_difference(
    y_true=val_with_raw['toxic'],
    y_pred=y_pred,
    sensitive_features=group
))


Visualize Disparities

In [None]:
import matplotlib.pyplot as plt

# Bar plot of TPR by group
mf.by_group['true_positive_rate'].plot(kind='bar', title='TPR by Male=0 vs Male=1')
plt.ylabel("True Positive Rate")
plt.show()

# Bar plot of selection rate
mf.by_group['selection_rate'].plot(kind='bar', title='Selection Rate by Male=0 vs Male=1')
plt.ylabel("Selection Rate")
plt.show()


Save Audit Results

In [None]:
out_dir = os.path.join(repo_root, 'notebooks', 'ethical_audit', 'results')
os.makedirs(out_dir, exist_ok=True)
mf.by_group.to_csv(os.path.join(out_dir, 'fairness_by_group.csv'))