# NewV3 Compas Fairness NetArch3

In [None]:
!pip install ucimlrepo
!pip install fairlearn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.quantization import quantize_dynamic, QConfig, prepare_qat, convert
from torch.quantization.fake_quantize import FakeQuantize
from torch.quantization.observer import MovingAverageMinMaxObserver
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, matthews_corrcoef, f1_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from ucimlrepo import fetch_ucirepo
from fairlearn.metrics import MetricFrame
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from fairlearn.metrics import (
    MetricFrame,
    selection_rate,
    true_positive_rate,
    false_positive_rate
)

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7
Collecting fairlearn
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0


##Code Persistence

In [None]:
def save_model(model, model_path):
  torch.save(model.state_dict(), model_path)
"""
def save_values_to_txt(data, filename):
  with open(filename, 'a') as f:
      for entry in data:
          for key, value in entry.items():
              f.write(f'{key}: {value}\n')
          f.write('\n')
  print(f"Data appended to {filename}")
"""
def save_values_to_txt(data, filename):
    if isinstance(data, dict):
        data = [data]
    with open(filename, 'a') as f:
        for entry in data:
            for key, value in entry.items():
                f.write(f'{key}: {value}\n')
            f.write('\n')
    print(f"Data appended to {filename}")

In [None]:
random.seed(42)
np.random.seed(42)

### Data Loading

In [None]:
def load_your_dataset():

    url = "https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv"
    dataset = pd.read_csv(url)

    X_features = [ "age", "sex", "race", "priors_count", "c_charge_degree","c_charge_desc", "c_offense_date", "juv_fel_count", "juv_misd_count", "decile_score", "v_decile_score"]
    X = dataset[X_features]

    y = dataset['two_year_recid']
    le = LabelEncoder()
    y = le.fit_transform(y.values.ravel())
    X_np = X.values
    y_np = y

    X_dummies = pd.get_dummies(X, drop_first=True)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(X_dummies)

    A_np = X["sex"].values
    print(A_np.size)
    print(X["sex"].value_counts())
    return X_np, y_np, A_np

X_np, y_np, A_np = load_your_dataset()

7214
sex
Male      5819
Female    1395
Name: count, dtype: int64


## Fairness Evaluation

* Statistical Parity Difference: Closer to 0 is better. 0 means no bias.
* Average Odds Difference: 0 is ideal, indicating no discrimination.
* Disparate Impact: Aim for 1. Values close to 1 indicate fairness.
* Theil Index: Lower is better. 0 means perfect equality.

In [None]:
def calculate_statistical_parity_difference(y_true, y_pred, A_test):
    sr = lambda y_true, y_pred: selection_rate(y_true, y_pred)
    mfm = MetricFrame(metrics=sr, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)
    return mfm.difference(method='between_groups')

def calculate_average_odds_difference(y_true, y_pred, A_test):
    unique_labels = np.unique(y_true)
    if len(unique_labels) == 2:
        pos_label = unique_labels[1]
    else:
        raise ValueError("y_true should have exactly two unique values for binary classification")

    tpr = lambda y_true, y_pred: true_positive_rate(y_true, y_pred, pos_label=pos_label)
    fpr = lambda y_true, y_pred: false_positive_rate(y_true, y_pred, pos_label=pos_label)
    average_odds = lambda y_true, y_pred: (tpr(y_true, y_pred) + fpr(y_true, y_pred)) / 2

    mf = MetricFrame(metrics=average_odds,
                     y_true=y_true,
                     y_pred=y_pred,
                     sensitive_features=A_test)
    return mf.difference(method='between_groups')

def calculate_disparate_impact(y_true, y_pred, A_test):
    sr = lambda y_true, y_pred: selection_rate(y_true, y_pred)
    mf = MetricFrame(metrics=sr, y_true=y_true, y_pred=y_pred, sensitive_features=A_test)
    return mf.ratio(method='between_groups')

def calculate_theil_index(y_true, y_pred):
    actual_pos = np.mean(y_true == 1)
    pred_pos = np.mean(y_pred == 1)

    epsilon = 1e-10

    actual_entropy = -(actual_pos * np.log2(actual_pos + epsilon) + (1 - actual_pos) * np.log2(1 - actual_pos + epsilon))
    pred_entropy = -(pred_pos * np.log2(pred_pos + epsilon) + (1 - pred_pos) * np.log2(1 - pred_pos + epsilon))

    theil_index = pred_entropy - actual_entropy
    return theil_index

def theil_index(values):
    values = np.array(values)
    mean_val = np.mean(values)
    if mean_val == 0:
        return 0
    theil = np.mean((values / mean_val) * np.log(values / mean_val + 1e-10))
    return theil
def theil_by_group(values, groups):
    df = pd.DataFrame({'value': values, 'group': groups})
    return df.groupby('group')['value'].apply(theil_index)

##Model Architecture

In [None]:
n_input = X_np.shape[1]

class NetArch1(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(n_input, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)

        self.layernorm1 = nn.LayerNorm(128)
        self.layernorm2 = nn.LayerNorm(64)

    def forward(self, x):
        x = self.layernorm1(F.relu(self.fc1(x)))
        x = self.layernorm2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

class NetArch2(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

class NetArch3(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)

    def forward(self, x):
        x = F.sigmoid(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        x = F.sigmoid(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

## QAT Compatible Model Architecture

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.quantization

n_input = X_np.shape[1]

class QATArch1(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)
        self.layernorm1 = nn.LayerNorm(128)
        self.layernorm2 = nn.LayerNorm(64)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.layernorm1(F.relu(self.fc1(x)))
        x = self.layernorm2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

class QATArch2(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

class QATArch3(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        x = self.fc4(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

In [None]:
def dynamic_compress_model(model_path):
    model = NetArch3()
    model.load_state_dict(torch.load(model_path))
    model.eval()

    compressed_model = quantize_dynamic(
        model=model,
        qconfig_spec={torch.nn.Linear},
        dtype=torch.qint8
    )
    return compressed_model

def prepare_model_for_qat(model):
    model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
    model = torch.quantization.prepare_qat(model)
    return model

def convert_qat_model(model):
    model = torch.quantization.convert(model.eval())
    return model


def get_k_fold_split(X, y,A, k, num_splits=5):
    indices = np.arange(len(X))
    fold_size = len(X) // num_splits

    start = k * fold_size
    end = start + fold_size if k != num_splits - 1 else len(X)

    test_idx = indices[start:end]
    train_idx = np.concatenate([indices[:start], indices[end:]])

    return X[train_idx], y[train_idx], A[train_idx], X[test_idx], y[test_idx], A[test_idx]


def train_baseline_model(X_train, y_train, fold, num_epochs=10, batch_size=64):
  model = NetArch3()
  optimizer = optim.Adam(model.parameters(), lr=1e-3)
  criterion = nn.NLLLoss()

  dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
  train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

  model.train()
  for epoch in range(num_epochs):
      for batch_X, batch_y in train_loader:
          optimizer.zero_grad()
          outputs = model(batch_X)
          loss = criterion(outputs, batch_y)
          loss.backward()
          optimizer.step()

  model.eval()
  y_true, y_pred = [], []
  with torch.no_grad():
      for batch_X, batch_y in train_loader:
          outputs = model(batch_X)
          _, predicted = torch.max(outputs, 1)
          y_true.extend(batch_y.numpy())
          y_pred.extend(predicted.numpy())

  f1 = f1_score(y_true, y_pred)
  print(f"Fold {fold + 1}: Train F1 Score = {f1:.4f}")


  model_path = f"model_fold{fold}.pkl"
  save_model(model, model_path)

  return model

def train_qat_model(X_train, y_train, fold, num_epochs=10, batch_size=64):
    model = QATArch3()
    model = prepare_model_for_qat(model)

    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.NLLLoss()

    dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for batch_X, batch_y in train_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * batch_X.size(0)

    model.eval()
    quantized_model = convert_qat_model(model)

    y_true, y_pred = [], []
    with torch.no_grad():
        for batch_X, batch_y in train_loader:
            outputs = quantized_model(batch_X)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(batch_y.numpy())
            y_pred.extend(predicted.numpy())

    f1 = f1_score(y_true, y_pred)
    print(f"Fold {fold + 1}: Train F1 Score (QAT) = {f1:.4f}")

    model_path = f"qat_model_fold{fold}.pkl"
    save_model(quantized_model, model_path)

    return quantized_model


def evaluate_baseline_model(model, X_test, y_test, A_test):
    output = model.forward(torch.FloatTensor(X_test))
    _, y_pred = torch.max(output, dim=1)

    y_pred = pd.Series(y_pred.detach().numpy())
    y_test = pd.Series(y_test)

    mf1 = MetricFrame(metrics=f1_score, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)

    baseline_metrics = {
        "Baseline Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred, A_test), 3),
        "Baseline Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred, A_test), 3),
        "Baseline Disparate Impact": round(calculate_disparate_impact(y_test, y_pred, A_test), 3),
        "Baseline Theil Index": round(calculate_theil_index(y_test, y_pred),3),
        "Baseline Theil Index - Within": round(theil_by_group(y_pred, A_test), 3),
        "Baseline F1 Score": round(f1_score(y_test, y_pred),3)
    }

    return baseline_metrics

def evaluate_qat_model(model, X_test, y_test, A_test):
    X_test = X_test.astype(np.float32)

    nan_mask = np.isnan(X_test)
    if nan_mask.any():
        col_means = np.nanmean(X_test, axis=0)
        X_test[nan_mask] = col_means[nan_mask[0]]

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

    model.eval()

    with torch.no_grad():
        output = model(X_test_tensor)
        _, y_pred = torch.max(output, dim=1)

    y_pred_np = y_pred.cpu().numpy()

    qat_metrics = {
        "QAT Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred_np, A_test), 3),
        "QAT Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred_np, A_test), 3),
        "QAT Disparate Impact": round(calculate_disparate_impact(y_test, y_pred_np, A_test), 3),
        "QAT Theil Index": round(calculate_theil_index(y_test, y_pred_np), 3),
        "QAT Theil Index-Within": round(theil_by_group(y_pred_np, A_test), 3),
        "QAT F1 Score": round(f1_score(y_test, y_pred_np), 3),
    }

    return qat_metrics

def evaluate_compressed_model(model, X_test, y_test, A_test):
    output = model(torch.FloatTensor(X_test))
    _, y_pred = torch.max(output, dim=1)

    y_pred = pd.Series(y_pred.detach().numpy())
    y_test = pd.Series(y_test)

    mf = MetricFrame(metrics=f1_score, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)

    compressed_metrics = {
        "Compressed Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred, A_test), 3),
        "Compressed Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred, A_test), 3),
        "Compressed Disparate Impact": round(calculate_disparate_impact(y_test, y_pred, A_test), 3),
        "Compressed Theil Index": round(calculate_theil_index(y_test, y_pred), 3),
        "Compressed Theil Index-Within": round(theil_by_group(y_pred, A_test), 3),
        "Compressed F1 Score": round(f1_score(y_test, y_pred), 3),
    }
    return compressed_metrics


### k‑Fold Cross Validation


In [None]:

X, y, A = load_your_dataset()
num_splits = 10

baseline_fold_metrics = []
compressed_fold_metrics = []
qat_fold_metrics = []

for k in range(num_splits):
    print(f"\nProcessing fold {k + 1}/{num_splits}")

    X_train, y_train, A_train, X_test, y_test, A_test = get_k_fold_split(X, y, A, k, num_splits)

    baseline_model = train_baseline_model(X_train, y_train, k)
    baseline_eval = evaluate_baseline_model(baseline_model, X_test, y_test, A_test)
    model_path = f"model_fold{k}.pkl"

    compressed_model = dynamic_compress_model(model_path)
    compressed_eval = evaluate_compressed_model(compressed_model, X_test, y_test, A_test)

    qat_model = train_qat_model(X_train, y_train, k)
    qat_eval = evaluate_qat_model(qat_model, X_test, y_test, A_test)

    baseline_fold_metrics.append(baseline_eval)
    compressed_fold_metrics.append(compressed_eval)
    qat_fold_metrics.append(qat_eval)

    print(f"Baseline Eval: {baseline_eval}")
    print(f"Compressed Eval: {compressed_eval}")
    print(f"QAT Eval: {qat_eval}")

baseline_df = pd.DataFrame(baseline_fold_metrics)
compressed_df = pd.DataFrame(compressed_fold_metrics)
qat_df = pd.DataFrame(qat_fold_metrics)


baseline_df.to_csv("baseline_metrics.csv", index=True, float_format="%.3f")
compressed_df.to_csv("compressed_metrics.csv", index=True, float_format="%.3f")
qat_df.to_csv("qat_metrics.csv", index=True, float_format="%.3f")



Processing fold 1/10
Fold 1: Train F1 Score = 0.7685




Fold 1: Train F1 Score (QAT) = 0.7716
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.175), 'Baseline Average Odds Difference': np.float64(0.115), 'Baseline Disparate Impact': np.float64(0.63), 'Baseline Theil Index': np.float64(-0.003), 'Baseline Theil Index - Within': group
Female    1.209
Male      0.747
Name: value, dtype: float64, 'Baseline F1 Score': 0.678}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.174), 'Compressed Average Odds Difference': np.float64(0.122), 'Compressed Disparate Impact': np.float64(0.653), 'Compressed Theil Index': np.float64(0.004), 'Compressed Theil Index-Within': group
Female    1.114
Male      0.688
Name: value, dtype: float64, 'Compressed F1 Score': 0.681}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.172), 'QAT Average Odds Difference': np.float64(0.117), 'QAT Disparate Impact': np.float64(0.635), 'QAT Theil Index': np.float64(-0.004), 'QAT Theil Index-Within': group
Female    1.209
M



Fold 2: Train F1 Score (QAT) = 0.7794
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.159), 'Baseline Average Odds Difference': np.float64(0.099), 'Baseline Disparate Impact': np.float64(0.576), 'Baseline Theil Index': np.float64(-0.056), 'Baseline Theil Index - Within': group
Female    1.531
Male      0.980
Name: value, dtype: float64, 'Baseline F1 Score': 0.62}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.183), 'Compressed Average Odds Difference': np.float64(0.123), 'Compressed Disparate Impact': np.float64(0.584), 'Compressed Theil Index': np.float64(-0.011), 'Compressed Theil Index-Within': group
Female    1.360
Male      0.821
Name: value, dtype: float64, 'Compressed F1 Score': 0.652}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.204), 'QAT Average Odds Difference': np.float64(0.147), 'QAT Disparate Impact': np.float64(0.551), 'QAT Theil Index': np.float64(-0.005), 'QAT Theil Index-Within': group
Female    1.386




Fold 3: Train F1 Score (QAT) = 0.7737
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.191), 'Baseline Average Odds Difference': np.float64(0.17), 'Baseline Disparate Impact': np.float64(0.557), 'Baseline Theil Index': np.float64(-0.011), 'Baseline Theil Index - Within': group
Female    1.427
Male      0.841
Name: value, dtype: float64, 'Baseline F1 Score': 0.611}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.223), 'Compressed Average Odds Difference': np.float64(0.203), 'Compressed Disparate Impact': np.float64(0.51), 'Compressed Theil Index': np.float64(-0.001), 'Compressed Theil Index-Within': group
Female    1.461
Male      0.788
Name: value, dtype: float64, 'Compressed F1 Score': 0.605}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.187), 'QAT Average Odds Difference': np.float64(0.175), 'QAT Disparate Impact': np.float64(0.571), 'QAT Theil Index': np.float64(-0.009), 'QAT Theil Index-Within': group
Female    1.394
M



Fold 4: Train F1 Score (QAT) = 0.6483
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.099), 'Baseline Average Odds Difference': np.float64(0.065), 'Baseline Disparate Impact': np.float64(0.737), 'Baseline Theil Index': np.float64(-0.053), 'Baseline Theil Index - Within': group
Female    1.281
Male      0.976
Name: value, dtype: float64, 'Baseline F1 Score': 0.641}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.109), 'Compressed Average Odds Difference': np.float64(0.078), 'Compressed Disparate Impact': np.float64(0.742), 'Compressed Theil Index': np.float64(-0.022), 'Compressed Theil Index-Within': group
Female    1.156
Male      0.858
Name: value, dtype: float64, 'Compressed F1 Score': 0.655}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.11), 'QAT Average Odds Difference': np.float64(0.083), 'QAT Disparate Impact': np.float64(0.627), 'QAT Theil Index': np.float64(-0.143), 'QAT Theil Index-Within': group
Female    1.686




Fold 5: Train F1 Score (QAT) = 0.7639
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.159), 'Baseline Average Odds Difference': np.float64(0.125), 'Baseline Disparate Impact': np.float64(0.684), 'Baseline Theil Index': np.float64(0.002), 'Baseline Theil Index - Within': group
Female    1.064
Male      0.685
Name: value, dtype: float64, 'Baseline F1 Score': 0.612}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.169), 'Compressed Average Odds Difference': np.float64(0.141), 'Compressed Disparate Impact': np.float64(0.688), 'Compressed Theil Index': np.float64(0.003), 'Compressed Theil Index-Within': group
Female    0.986
Male      0.612
Name: value, dtype: float64, 'Compressed F1 Score': 0.604}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.133), 'QAT Average Odds Difference': np.float64(0.098), 'QAT Disparate Impact': np.float64(0.662), 'QAT Theil Index': np.float64(-0.048), 'QAT Theil Index-Within': group
Female    1.345
M



Fold 6: Train F1 Score (QAT) = 0.7569
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.19), 'Baseline Average Odds Difference': np.float64(0.138), 'Baseline Disparate Impact': np.float64(0.58), 'Baseline Theil Index': np.float64(-0.015), 'Baseline Theil Index - Within': group
Female    1.335
Male      0.791
Name: value, dtype: float64, 'Baseline F1 Score': 0.677}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.195), 'Compressed Average Odds Difference': np.float64(0.147), 'Compressed Disparate Impact': np.float64(0.608), 'Compressed Theil Index': np.float64(0.001), 'Compressed Theil Index-Within': group
Female    1.195
Male      0.698
Name: value, dtype: float64, 'Compressed F1 Score': 0.682}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.174), 'QAT Average Odds Difference': np.float64(0.125), 'QAT Disparate Impact': np.float64(0.563), 'QAT Theil Index': np.float64(-0.05), 'QAT Theil Index-Within': group
Female    1.498
Mal



Fold 7: Train F1 Score (QAT) = 0.7337
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.134), 'Baseline Average Odds Difference': np.float64(0.046), 'Baseline Disparate Impact': np.float64(0.735), 'Baseline Theil Index': np.float64(0.005), 'Baseline Theil Index - Within': group
Female    0.987
Male      0.679
Name: value, dtype: float64, 'Baseline F1 Score': 0.655}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.13), 'Compressed Average Odds Difference': np.float64(0.053), 'Compressed Disparate Impact': np.float64(0.759), 'Compressed Theil Index': np.float64(0.006), 'Compressed Theil Index-Within': group
Female    0.887
Male      0.612
Name: value, dtype: float64, 'Compressed F1 Score': 0.659}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.157), 'QAT Average Odds Difference': np.float64(0.1), 'QAT Disparate Impact': np.float64(0.657), 'QAT Theil Index': np.float64(-0.01), 'QAT Theil Index-Within': group
Female    1.202
Male 



Fold 8: Train F1 Score (QAT) = 0.6096
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.121), 'Baseline Average Odds Difference': np.float64(0.079), 'Baseline Disparate Impact': np.float64(0.692), 'Baseline Theil Index': np.float64(-0.045), 'Baseline Theil Index - Within': group
Female    1.299
Male      0.932
Name: value, dtype: float64, 'Baseline F1 Score': 0.662}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.153), 'Compressed Average Odds Difference': np.float64(0.108), 'Compressed Disparate Impact': np.float64(0.647), 'Compressed Theil Index': np.float64(-0.023), 'Compressed Theil Index-Within': group
Female    1.272
Male      0.837
Name: value, dtype: float64, 'Compressed F1 Score': 0.666}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.149), 'QAT Average Odds Difference': np.float64(0.117), 'QAT Disparate Impact': np.float64(0.504), 'QAT Theil Index': np.float64(-0.151), 'QAT Theil Index-Within': group
Female    1.887



Fold 9: Train F1 Score (QAT) = 0.6928
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.136), 'Baseline Average Odds Difference': np.float64(0.114), 'Baseline Disparate Impact': np.float64(0.69), 'Baseline Theil Index': np.float64(-0.01), 'Baseline Theil Index - Within': group
Female    1.195
Male      0.824
Name: value, dtype: float64, 'Baseline F1 Score': 0.645}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.162), 'Compressed Average Odds Difference': np.float64(0.142), 'Compressed Disparate Impact': np.float64(0.657), 'Compressed Theil Index': np.float64(0.002), 'Compressed Theil Index-Within': group
Female    1.172
Male      0.752
Name: value, dtype: float64, 'Compressed F1 Score': 0.659}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.238), 'QAT Average Odds Difference': np.float64(0.22), 'QAT Disparate Impact': np.float64(0.47), 'QAT Theil Index': np.float64(-0.016), 'QAT Theil Index-Within': group
Female    1.555
Male



Fold 10: Train F1 Score (QAT) = 0.7717
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.181), 'Baseline Average Odds Difference': np.float64(0.16), 'Baseline Disparate Impact': np.float64(0.563), 'Baseline Theil Index': np.float64(-0.045), 'Baseline Theil Index - Within': group
Female    1.458
Male      0.883
Name: value, dtype: float64, 'Baseline F1 Score': 0.667}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.199), 'Compressed Average Odds Difference': np.float64(0.178), 'Compressed Disparate Impact': np.float64(0.582), 'Compressed Theil Index': np.float64(-0.012), 'Compressed Theil Index-Within': group
Female    1.285
Male      0.744
Name: value, dtype: float64, 'Compressed F1 Score': 0.676}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.187), 'QAT Average Odds Difference': np.float64(0.17), 'QAT Disparate Impact': np.float64(0.579), 'QAT Theil Index': np.float64(-0.025), 'QAT Theil Index-Within': group
Female    1.355


In [None]:
print(baseline_df)
baseline_df.to_csv("baseline_metrics.csv", index=True, float_format="%.3f")

   Baseline Statistical Parity Difference  Baseline Average Odds Difference  \
0                                   0.175                             0.115   
1                                   0.159                             0.099   
2                                   0.191                             0.170   
3                                   0.099                             0.065   
4                                   0.159                             0.125   
5                                   0.190                             0.138   
6                                   0.134                             0.046   
7                                   0.121                             0.079   
8                                   0.136                             0.114   
9                                   0.181                             0.160   

   Baseline Disparate Impact  Baseline Theil Index  \
0                      0.630                -0.003   
1                     

In [None]:
!zip -r colab_files.zip /content
from google.colab import files
files.download('colab_files.zip')

updating: content/ (stored 0%)
updating: content/.config/ (stored 0%)
updating: content/.config/active_config (stored 0%)
updating: content/.config/.last_opt_in_prompt.yaml (stored 0%)
updating: content/.config/config_sentinel (stored 0%)
updating: content/.config/.last_survey_prompt.yaml (stored 0%)
updating: content/.config/gce (stored 0%)
updating: content/.config/.last_update_check.json (deflated 23%)
updating: content/.config/hidden_gcloud_config_universe_descriptor_data_cache_configs.db (deflated 97%)
updating: content/.config/logs/ (stored 0%)
updating: content/.config/logs/2025.05.14/ (stored 0%)
updating: content/.config/logs/2025.05.14/13.38.05.736741.log (deflated 86%)
updating: content/.config/logs/2025.05.14/13.37.56.530848.log (deflated 58%)
updating: content/.config/logs/2025.05.14/13.38.07.566408.log (deflated 58%)
updating: content/.config/logs/2025.05.14/13.38.16.976468.log (deflated 57%)
updating: content/.config/logs/2025.05.14/13.38.17.706556.log (deflated 56%)
upd

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>