# COMPAS Analysis with DivDis

This notebook implements the DivDis (Diverse Disagreement) approach on the COMPAS dataset.

In [3]:
# Import required libraries
from utils import get_base_dfs, COMPASDataset, create_data_splits, explain_model, class_weights
from models import DivDisClassifier, BaseClassifier, ModelWrapper, HeadWrapper, ResidualMLPClassifier
from train import train_model, trainDivDis
from torch.utils.data import ConcatDataset, DataLoader
import torch.nn as nn
import torch
from explainer import Explainer
import numpy as np

ModuleNotFoundError: No module named 'captum'

In [7]:
import pandas as pd
df_jigsaw = pd.read_csv('jigsaw_dataset/all_data.csv')

# Display first few rows and basic info
print("Dataset shape:", df_jigsaw.shape)
print("\nFirst few rows:")
display(df_jigsaw.head())
print("\nColumns:")
print(df_jigsaw.columns.tolist())

Dataset shape: (1999516, 46)

First few rows:


Unnamed: 0,id,comment_text,split,created_date,publication_id,parent_id,article_id,rating,funny,wow,...,white,asian,latino,other_race_or_ethnicity,physical_disability,intellectual_or_learning_disability,psychiatric_or_mental_illness,other_disability,identity_annotator_count,toxicity_annotator_count
0,1083994,He got his money... now he lies in wait till a...,train,2017-03-06 15:21:53.675241+00,21,,317120,approved,0,0,...,,,,,,,,,0,67
1,650904,Mad dog will surely put the liberals in mental...,train,2016-12-02 16:44:21.329535+00,21,,154086,approved,0,0,...,,,,,,,,,0,76
2,5902188,And Trump continues his lifelong cowardice by ...,train,2017-09-05 19:05:32.341360+00,55,,374342,approved,1,0,...,,,,,,,,,0,63
3,7084460,"""while arresting a man for resisting arrest"".\...",test,2016-11-01 16:53:33.561631+00,13,,149218,approved,0,0,...,,,,,,,,,0,76
4,5410943,Tucker and Paul are both total bad ass mofo's.,train,2017-06-14 05:08:21.997315+00,21,,344096,approved,0,0,...,,,,,,,,,0,80



Columns:
['id', 'comment_text', 'split', 'created_date', 'publication_id', 'parent_id', 'article_id', 'rating', 'funny', 'wow', 'sad', 'likes', 'disagree', 'toxicity', 'severe_toxicity', 'obscene', 'sexual_explicit', 'identity_attack', 'insult', 'threat', 'male', 'female', 'transgender', 'other_gender', 'heterosexual', 'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation', 'christian', 'jewish', 'muslim', 'hindu', 'buddhist', 'atheist', 'other_religion', 'black', 'white', 'asian', 'latino', 'other_race_or_ethnicity', 'physical_disability', 'intellectual_or_learning_disability', 'psychiatric_or_mental_illness', 'other_disability', 'identity_annotator_count', 'toxicity_annotator_count']


NameError: name 'df' is not defined

In [None]:
# Check NaN values in race-related columns
race_columns = [col for col in df_jigsaw.columns if any(race in col.lower() for race in ['race', 'black', 'white', 'asian', 'latino', 'hispanic'])]

# Display percentage of NaN values for each race column
nan_stats = df_jigsaw[race_columns].isna().mean() * 100

print("Percentage of NaN values in race columns:")
for col, pct in nan_stats.items():
    print(f"{col}: {pct:.2f}%")

# Display total number of rows
print(f"\nTotal rows in dataset: {len(df_jigsaw)}")

## Data Preparation
Load and prepare the COMPAS dataset

In [None]:
# Load data
non_violent, violent = get_base_dfs()

# Define features
train_columns = [
    "juv_fel_count", "juv_misd_count", "juv_other_count",
    "priors_count", "african-american", "caucasian", "hispanic",
    "other", "asian", "native-american", "less25", "greater45",
    "25to45", "felony", "misdemeanor", "two_years_r"
]

# Create dataset
dataDf = violent[train_columns]
data = COMPASDataset(dataDf, "two_years_r")

# Split data
trainData, unlabelData, _, testData, rawTrain, rawUnlabel, _, testRaw = create_data_splits(data)

## Base Model Training
Train a baseline ResidualMLP model

In [None]:
# Get input dimensions
dataiter = iter(trainData)
batch = next(dataiter)
train_columns.pop()
inputs, targets = batch
input_dim = int(inputs.shape[1])

# Initialize base model
baseModel = ResidualMLPClassifier(
    input_dim=input_dim,  
    hidden_dim=64,        
    num_blocks=1,
    dropout_rate=0.3,
    num_classes=2         
)

# Prepare data and train
fullTrain = ConcatDataset([rawTrain, rawUnlabel])
fullLoader = DataLoader(fullTrain, 64, True)
weights = class_weights(fullTrain)
criterion = nn.CrossEntropyLoss(weight=weights)

# Train base model
baseModel, history = train_model(baseModel, fullLoader, testData, criterion, 3, 0.001)

## DivDis Model Training
Train the DivDis model with multiple heads

In [None]:
# Initialize and train DivDis model
divDisModel = DivDisClassifier(input_dim)
full_loss = trainDivDis(divDisModel, 15, trainData, unlabelData, testData, criterion)

# Explain predictions for each head
for head in range(divDisModel.num_heads):
    model_name = f"DivDis model head_{head}"
    model_head = HeadWrapper(divDisModel, head)
    model_head.eval()
    explain_model(model_head, divDisModel.num_classes, testRaw, model_name, input_dim, train_columns)