# Jigsaw Unintended Bias Audit – Draft Notebook

*Responsible Data Science, Spring 2025*
Michael and Julius
Following is written in collabration with AI, almost all documentation is auto generated while some are handwritten, the co-pilot auto-gens sufficed. And some of the code is also AI assisted via co-pilot. 

This Colab‑friendly notebook supports the **draft report**. It:
1. Installs all needed libraries (Kaggle API, transformers …).
2. Downloads the competition data via the Kaggle API.
3. Performs a lightweight exploratory analysis.
4. Trains a quick TF‑IDF + LogReg baseline and evaluates the competition AUC metric.

In [None]:

# # ---- Environment bootstrap -----------------------------
# !pip -q install --upgrade kaggle==1.6.8 datasets transformers evaluate scikit-learn textstat pyarrow --no-warn-script-location
# import os, json, pathlib, subprocess, textwrap, zipfile, shutil, random, numpy as np, pandas as pd
# print('✓ Libraries installed')

# ---- Environment bootstrap (Colab‑safe) -------------------
!pip -q install --upgrade kaggle datasets transformers evaluate textstat pyarrow --no-warn-script-location
import os, json, pathlib, subprocess, random, numpy as np, pandas as pd
print("✓ Libraries installed (Colab's built‑in PyTorch is intact)")



✓ Libraries installed (Colab's built‑in PyTorch is intact)


In [None]:
%%bash
# clean start – uninstall incompatible HF fragments
pip -q uninstall -y transformers accelerate peft datasets evaluate -qq || true

# single pinned stack tested on Colab (A100 & T4, CUDA 12.1)
pip -q install --upgrade --no-warn-script-location \
  "torch==2.2.1+cu121" "torchvision==0.17.1+cu121" \
     -f https://download.pytorch.org/whl/cu121/torch_stable.html \
  transformers==4.41.0 \
  accelerate==0.25.0 \
  peft==0.11.1 \
  datasets==2.19.1 \
  evaluate==0.4.2 \
  kaggle==1.6.8 optuna==0.13.0 nltk==3.8.1 attrdict==2.0.1 \
  tqdm==4.66.2 fsspec==2023.6.0 scikit-learn==1.4.2

python - <<'PY'
import torch, transformers, accelerate, peft, datasets, evaluate
print(f"✓ stack ready → torch {torch.__version__}, transformers {transformers.__version__}")
PY


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 757.3/757.3 MB 1.5 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.0/7.0 MB 31.6 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 43.8/43.8 kB 1.8 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 84.6/84.6 kB 4.0 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 85.0/85.0 kB 7.8 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 57.6/57.6 kB 5.2 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.6/78.6 kB 7.7 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.1/9.1 MB 87.3 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 265.7/265.7 kB 24.2 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 251.6/251.6 kB 22.6 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 542.0/542.0 kB 41.5 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 84.1/84.1 kB 8.4 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2023.6.0 which is incompatible.
textblob 0.19.0 requires nltk>=3.9, but you have nltk 3.8.1 which is incompatible.
torchaudio 2.6.0+cu124 requires torch==2.6.0, but you have torch 2.2.1+cu121 which is incompatible.

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<stdin>", line 1, in <module>
  File "/usr/local/lib/python3.11/dist

In [None]:

# ----  Kaggle authentication ------------------------------
from google.colab import files
import shutil # import the shutil module
home = pathlib.Path.home()
kaggle_path = home/'.kaggle'
kaggle_path.mkdir(exist_ok=True)
if not (kaggle_path/'kaggle.json').exists():
    print("Upload kaggle.json...")
    uploaded = files.upload()
    for fn in uploaded.keys():
        shutil.move(fn, kaggle_path/'kaggle.json')
        os.chmod(kaggle_path/'kaggle.json', 0o600)
!kaggle config set -n path -v /content/data
print('✓ Kaggle API configured')


Upload kaggle.json...


Saving kaggle.json to kaggle (1).json
- path is now set to: /content/data
✓ Kaggle API configured


In [None]:

# ----  Download data --------------------------------------
COMP = "jigsaw-unintended-bias-in-toxicity-classification"
!kaggle competitions download -c $COMP -p /content/data --force
!unzip -q /content/data/$COMP*.zip -d /content/data
print('Files:', os.listdir('/content/data')[:10])


Downloading jigsaw-unintended-bias-in-toxicity-classification.zip to /content/data
 93% 676M/723M [00:00<00:00, 1.16GB/s]
100% 723M/723M [00:00<00:00, 1.17GB/s]
Files: ['sample_submission.csv', 'identity_individual_annotations.csv', 'train.csv', 'all_data.csv', 'test.csv', 'test_public_expanded.csv', 'jigsaw-unintended-bias-in-toxicity-classification.zip', 'test_private_expanded.csv', 'toxicity_individual_annotations.csv']


In [None]:

# ---- Load dataset & quick EDA ---------------------------
import pandas as pd, pyarrow.parquet as pq, numpy as np
train_csv = '/content/data/train.csv'
df = pd.read_csv(train_csv, nrows=200_000)  # sample for Colab RAM
print(df.shape)
display(df.head())
print(df[['target']].describe())


(200000, 45)


Unnamed: 0,id,target,comment_text,severe_toxicity,obscene,identity_attack,insult,threat,asian,atheist,...,article_id,rating,funny,wow,sad,likes,disagree,sexual_explicit,identity_annotator_count,toxicity_annotator_count
0,59848,0.0,"This is so cool. It's like, 'would you want yo...",0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4
1,59849,0.0,Thank you!! This would make my life a lot less...,0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4
2,59852,0.0,This is such an urgent design problem; kudos t...,0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4
3,59855,0.0,Is this something I'll be able to install on m...,0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4
4,59856,0.893617,haha you guys are a bunch of losers.,0.021277,0.0,0.021277,0.87234,0.0,0.0,0.0,...,2006,rejected,0,0,0,1,0,0.0,4,47


              target
count  200000.000000
mean        0.098020
std         0.194695
min         0.000000
25%         0.000000
50%         0.000000
75%         0.166667
max         1.000000


In [None]:

# ---- Baseline TF‑IDF + Logistic Regression --------------
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(df['comment_text'].fillna(' '),
                                                     df['target']>=0.5, test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer(max_features=100_000, ngram_range=(1,2), stop_words='english')
Xtr = vectorizer.fit_transform(X_train)
Xva = vectorizer.transform(X_valid)
clf = LogisticRegression(max_iter=1000, n_jobs=-1)
clf.fit(Xtr, y_train)
pred = clf.predict_proba(Xva)[:,1]
auc = roc_auc_score(y_valid, pred)
print(f"Validation ROC‑AUC: {auc:.4f}")


Validation ROC‑AUC: 0.9322


In [41]:
# --- tokeniser & HF validation Dataset (needed once) ----------
from transformers import AutoTokenizer
from datasets      import Dataset

tok_fn = lambda s: tok(
    s["comment_text"],
    truncation=True,
    padding="max_length",
    max_length=220,
)

# Build a HF Dataset that lines up with X_valid / y_valid
bert_valid_ds = Dataset.from_pandas(
    pd.DataFrame({"comment_text": X_valid.reset_index(drop=True)})
).map(tok_fn, batched=True, remove_columns=["comment_text"]).with_format("torch")

print("✓ bert_valid_ds", bert_valid_ds)


Map:   0%|          | 0/40000 [00:00<?, ? examples/s]

✓ bert_valid_ds Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 40000
})


In [24]:
from transformers import (AutoTokenizer, AutoModelForSequenceClassification,
                          pipeline, BitsAndBytesConfig)
import torch, math, numpy as np

device = 0 if torch.cuda.is_available() else -1
dtype  = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16

tok   = AutoTokenizer.from_pretrained("bert-base-uncased")
bert  = AutoModelForSequenceClassification.from_pretrained(
            "bert-base-uncased",
            num_labels=1, problem_type="single_label_regression",
            torch_dtype=dtype, low_cpu_mem_usage=True).to(device)

pipe  = pipeline("text-classification",
                 model=bert, tokenizer=tok,
                 function_to_apply="sigmoid",
                 batch_size=64,            # GPU‑friendly
                 device=device)

print(f"✓ BERT on {'GPU' if device==0 else 'CPU'}  |  dtype={dtype}")

pred_bert = np.array([p["score"] for p in pipe(list(X_valid))])

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


✓ BERT on GPU  |  dtype=torch.bfloat16


In [34]:
# ── Collect predictions & build df_valid ────────────────────────────────
import numpy as np, pandas as pd, pathlib, json, torch, math
from tqdm.auto import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# ---------- 1) TF‑IDF predictions (already trained) --------------------
pred_tfidf = clf.predict_proba(Xva)[:, 1]          # shape (|X_valid|,)

# ---------- 2) Fast GPU BERT inference ---------------------------------
DEVICE = 0 if torch.cuda.is_available() else -1
DTYPE  = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
torch.backends.cuda.matmul.allow_tf32 = True       # +speed on A100/T4

tok  = AutoTokenizer.from_pretrained("bert-base-uncased")
bert = AutoModelForSequenceClassification.from_pretrained(
            "bert-base-uncased",
            num_labels=1,
            problem_type="single_label_regression",
            torch_dtype=DTYPE,
            low_cpu_mem_usage=True
        ).to(DEVICE)

pipe = pipeline(
    task              = "text-classification",
    model             = bert,
    tokenizer         = tok,
    function_to_apply = "sigmoid",
    device            = DEVICE,
    batch_size        = 64
)

print(f"✓ BERT pipeline  •  device={'GPU' if DEVICE==0 else 'CPU'}  •  dtype={DTYPE}")

# ensure we hand the pipeline *lists of str*, never NumPy arrays
X_valid_list = X_valid.fillna(" ").astype(str).tolist()

pred_bert = []
for i in tqdm(range(0, len(X_valid_list), 64), desc="BERT batches"):
    batch = X_valid_list[i : i+64]           # Python list of  ≤64 strings
    scores = pipe(batch)                     # [{"label":"POS","score":…}, …]
    pred_bert.extend([p["score"] for p in scores])

pred_bert = np.asarray(pred_bert, dtype=np.float32)   # shape (|X_valid|,)
assert len(pred_bert) == len(X_valid_list), "length mismatch!"

# ---------- 3) build & save df_valid -----------------------------------
IDENTITY_COLS = ["male","female","black","white","asian","christian",
                 "jewish","muslim","hindu","buddhist","atheist","lgbtq",
                 "transgender"]

df_valid = pd.DataFrame({
    "comment_text": X_valid_list,
    "target":       y_valid.reset_index(drop=True),
    "pred_tfidf":   pred_tfidf,
    "pred_bert":    pred_bert,
    **{c: 0. for c in IDENTITY_COLS}          # dummy identity columns
})

pathlib.Path("logs").mkdir(exist_ok=True)
df_valid.to_parquet("logs/baseline_valid.parquet", index=False)

print("✓ df_valid saved – shape", df_valid.shape,
      "| pred_tfidf", pred_tfidf.shape,
      "| pred_bert",  pred_bert.shape)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


✓ BERT pipeline  •  device=GPU  •  dtype=torch.bfloat16


BERT batches:   0%|          | 0/625 [00:00<?, ?it/s]

✓ df_valid saved – shape (40000, 17) | pred_tfidf (40000,) | pred_bert (40000,)


In [36]:
# ---- Save artifacts -------------------------------------
out_dir = pathlib.Path('/content/artifacts')
out_dir.mkdir(exist_ok=True)
df_valid = pd.DataFrame({'pred': pred, 'target': y_valid.reset_index(drop=True)})
df_valid.to_parquet(out_dir/'baseline_valid.parquet', index=False)
vectorizer_path = out_dir/'tfidf.pkl'
clf_path = out_dir/'logreg.pkl'
import pickle, gzip
with gzip.open(vectorizer_path, 'wb') as f: pickle.dump(vectorizer, f)
with gzip.open(clf_path, 'wb') as f: pickle.dump(clf, f)
print('Artifacts saved to', out_dir)

Artifacts saved to /content/artifacts


In [37]:
import pandas as pd
from scipy.stats import pearsonr
from io import StringIO

profile = []
for col in df.columns:
    series = df[col]
    dtype = series.dtype
    miss = series.isna().mean()*100
    if pd.api.types.is_numeric_dtype(series):
        rng = (series.min(), series.max())
        profile.append((col, dtype.name, miss, f"{rng[0]:.2f} – {rng[1]:.2f}"))
    else:
        uniq = series.nunique()
        profile.append((col, dtype.name, miss, f"{uniq} unique"))

tex_table = StringIO()
tex_table.write("\\begin{tabular}{llll}\\toprule\n")
tex_table.write("Feature & Dtype & Missing\\,(\\%) & Range / Card.\\\\\\midrule\n")
for row in profile:
    tex_table.write(f"{row[0]} & {row[1]} & {row[2]:.2f} & {row[3]}\\\\\n")
tex_table.write("\\bottomrule\\end{tabular}")
with open("feature_profile.tex", "w") as f: f.write(tex_table.getvalue())
print("✓ feature_profile.tex saved")


✓ feature_profile.tex saved


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [40]:
df_valid = pd.DataFrame({
    "comment_text": X_valid.reset_index(drop=True),   # same ordering
    "target":       y_valid.reset_index(drop=True),
    **{c: 0. for c in IDENTITY_COLS},                 # dummy identity flags
    "pred_tfidf":   pred_tfidf,
    "pred_bert":    pred_bert
})


In [None]:
import matplotlib.pyplot as plt
x = np.arange(len(IDENTITY_COLS))
width = 0.35
plt.bar(x - width/2, [metrics["tfidf"]["subgroup"][c] for c in IDENTITY_COLS], width,
        label="TF–IDF")
plt.bar(x + width/2, [metrics["bert"]["subgroup"][c]  for c in IDENTITY_COLS], width,
        label="BERT")
plt.axhline(metrics["tfidf"]["overall"], linestyle="--")
plt.axhline(metrics["bert"]["overall"], linestyle="--")
plt.xticks(x, IDENTITY_COLS, rotation=90)
plt.ylabel("AUC")
plt.legend()
plt.tight_layout()
plt.savefig("figs/auc_barplot.pdf")
print("✓ Figure saved to figs/auc_barplot.pdf")
