# NewV2 Adult Fairness NetArch3

In [None]:
!pip install ucimlrepo
!pip install fairlearn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.quantization import quantize_dynamic, QConfig, prepare_qat, convert
from torch.quantization.fake_quantize import FakeQuantize
from torch.quantization.observer import MovingAverageMinMaxObserver
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, matthews_corrcoef, f1_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from ucimlrepo import fetch_ucirepo
from fairlearn.metrics import MetricFrame
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from fairlearn.metrics import (
    MetricFrame,
    selection_rate,
    true_positive_rate,
    false_positive_rate
)

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7
Collecting fairlearn
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0


##Code Persistence

In [None]:
def save_model(model, model_path):
  torch.save(model.state_dict(), model_path)
"""
def save_values_to_txt(data, filename):
  with open(filename, 'a') as f:
      for entry in data:
          for key, value in entry.items():
              f.write(f'{key}: {value}\n')
          f.write('\n')
  print(f"Data appended to {filename}")
"""
def save_values_to_txt(data, filename):
    if isinstance(data, dict):
        data = [data]
    with open(filename, 'a') as f:
        for entry in data:
            for key, value in entry.items():
                f.write(f'{key}: {value}\n')
            f.write('\n')
    print(f"Data appended to {filename}")

In [None]:
random.seed(42)
np.random.seed(42)

### Data Loading

In [None]:
def load_your_dataset():
    adult = fetch_ucirepo(id=2)
    X = adult.data.features
    y = adult.data.targets
    y = y.replace({'<=50K': 0, '<=50K.': 0, '>50K': 1, '>50K.': 1}).astype(int)
    le = LabelEncoder()
    y = le.fit_transform(y.values.ravel())
    X_encoded = pd.get_dummies(X, drop_first=True)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(X_encoded).astype(np.float32)
    y_np = np.array(y).astype(np.int64)

    A_np = X["sex"].values

    return X_np, y_np, A_np

X_np, y_np, A_np = load_your_dataset()

  y = y.replace({'<=50K': 0, '<=50K.': 0, '>50K': 1, '>50K.': 1}).astype(int)


## Fairness Evaluation

* Statistical Parity Difference: Closer to 0 is better. 0 means no bias.
* Average Odds Difference: 0 is ideal, indicating no discrimination.
* Disparate Impact: Aim for 1. Values close to 1 indicate fairness.
* Theil Index: Lower is better. 0 means perfect equality.

In [None]:
def calculate_statistical_parity_difference(y_true, y_pred, A_test):
    sr = lambda y_true, y_pred: selection_rate(y_true, y_pred)
    mfm = MetricFrame(metrics=sr, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)
    return mfm.difference(method='between_groups')

def calculate_average_odds_difference(y_true, y_pred, A_test):
    unique_labels = np.unique(y_true)
    if len(unique_labels) == 2:
        pos_label = unique_labels[1]
    else:
        raise ValueError("y_true should have exactly two unique values for binary classification")

    tpr = lambda y_true, y_pred: true_positive_rate(y_true, y_pred, pos_label=pos_label)
    fpr = lambda y_true, y_pred: false_positive_rate(y_true, y_pred, pos_label=pos_label)
    average_odds = lambda y_true, y_pred: (tpr(y_true, y_pred) + fpr(y_true, y_pred)) / 2

    mf = MetricFrame(metrics=average_odds,
                     y_true=y_true,
                     y_pred=y_pred,
                     sensitive_features=A_test)
    return mf.difference(method='between_groups')

def calculate_disparate_impact(y_true, y_pred, A_test):
    sr = lambda y_true, y_pred: selection_rate(y_true, y_pred)
    mf = MetricFrame(metrics=sr, y_true=y_true, y_pred=y_pred, sensitive_features=A_test)
    return mf.ratio(method='between_groups')

def calculate_theil_index(y_true, y_pred):
    actual_pos = np.mean(y_true == 1)
    pred_pos = np.mean(y_pred == 1)

    epsilon = 1e-10

    actual_entropy = -(actual_pos * np.log2(actual_pos + epsilon) + (1 - actual_pos) * np.log2(1 - actual_pos + epsilon))
    pred_entropy = -(pred_pos * np.log2(pred_pos + epsilon) + (1 - pred_pos) * np.log2(1 - pred_pos + epsilon))

    theil_index = pred_entropy - actual_entropy
    return theil_index

def theil_index(values):
    values = np.array(values)
    mean_val = np.mean(values)
    if mean_val == 0:
        return 0
    theil = np.mean((values / mean_val) * np.log(values / mean_val + 1e-10))
    return theil
def theil_by_group(values, groups):
    df = pd.DataFrame({'value': values, 'group': groups})
    return df.groupby('group')['value'].apply(theil_index)

##Model Architecture

In [None]:
n_input = X_np.shape[1]

class NetArch1(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(n_input, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)

        self.layernorm1 = nn.LayerNorm(128)
        self.layernorm2 = nn.LayerNorm(64)

    def forward(self, x):
        x = self.layernorm1(F.relu(self.fc1(x)))
        x = self.layernorm2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

class NetArch2(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

class NetArch3(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)

    def forward(self, x):
        x = F.sigmoid(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        x = F.sigmoid(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

## QAT Compatible Model Architecture

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.quantization

n_input = X_np.shape[1]

class QATArch1(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)
        self.layernorm1 = nn.LayerNorm(128)
        self.layernorm2 = nn.LayerNorm(64)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.layernorm1(F.relu(self.fc1(x)))
        x = self.layernorm2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

class QATArch2(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

class QATArch3(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        x = self.fc4(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

In [None]:
def dynamic_compress_model(model_path):
    model = NetArch3()
    model.load_state_dict(torch.load(model_path))
    model.eval()

    compressed_model = quantize_dynamic(
        model=model,
        qconfig_spec={torch.nn.Linear},
        dtype=torch.qint8
    )
    return compressed_model

def prepare_model_for_qat(model):
    model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
    model = torch.quantization.prepare_qat(model)
    return model

def convert_qat_model(model):
    model = torch.quantization.convert(model.eval())
    return model


def get_k_fold_split(X, y,A, k, num_splits=5):
    indices = np.arange(len(X))
    fold_size = len(X) // num_splits

    start = k * fold_size
    end = start + fold_size if k != num_splits - 1 else len(X)

    test_idx = indices[start:end]
    train_idx = np.concatenate([indices[:start], indices[end:]])

    return X[train_idx], y[train_idx], A[train_idx], X[test_idx], y[test_idx], A[test_idx]


def train_baseline_model(X_train, y_train, fold, num_epochs=10, batch_size=64):
  model = NetArch3()
  optimizer = optim.Adam(model.parameters(), lr=1e-3)
  criterion = nn.NLLLoss()

  dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
  train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

  model.train()
  for epoch in range(num_epochs):
      for batch_X, batch_y in train_loader:
          optimizer.zero_grad()
          outputs = model(batch_X)
          loss = criterion(outputs, batch_y)
          loss.backward()
          optimizer.step()

  model.eval()
  y_true, y_pred = [], []
  with torch.no_grad():
      for batch_X, batch_y in train_loader:
          outputs = model(batch_X)
          _, predicted = torch.max(outputs, 1)
          y_true.extend(batch_y.numpy())
          y_pred.extend(predicted.numpy())

  f1 = f1_score(y_true, y_pred)
  print(f"Fold {fold + 1}: Train F1 Score = {f1:.4f}")


  model_path = f"model_fold{fold}.pkl"
  save_model(model, model_path)

  return model

def train_qat_model(X_train, y_train, fold, num_epochs=10, batch_size=64):
    model = QATArch3()
    model = prepare_model_for_qat(model)

    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.NLLLoss()

    dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for batch_X, batch_y in train_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * batch_X.size(0)

    model.eval()
    quantized_model = convert_qat_model(model)

    y_true, y_pred = [], []
    with torch.no_grad():
        for batch_X, batch_y in train_loader:
            outputs = quantized_model(batch_X)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(batch_y.numpy())
            y_pred.extend(predicted.numpy())

    f1 = f1_score(y_true, y_pred)
    print(f"Fold {fold + 1}: Train F1 Score (QAT) = {f1:.4f}")

    model_path = f"qat_model_fold{fold}.pkl"
    save_model(quantized_model, model_path)

    return quantized_model


def evaluate_baseline_model(model, X_test, y_test, A_test):
    output = model.forward(torch.FloatTensor(X_test))
    _, y_pred = torch.max(output, dim=1)

    y_pred = pd.Series(y_pred.detach().numpy())
    y_test = pd.Series(y_test)

    mf1 = MetricFrame(metrics=f1_score, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)

    baseline_metrics = {
        "Baseline Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred, A_test), 3),
        "Baseline Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred, A_test), 3),
        "Baseline Disparate Impact": round(calculate_disparate_impact(y_test, y_pred, A_test), 3),
        "Baseline Theil Index": round(calculate_theil_index(y_test, y_pred),3),
        "Baseline Theil Index - Within": round(theil_by_group(y_pred, A_test), 3),
        "Baseline F1 Score": round(f1_score(y_test, y_pred),3)
    }

    return baseline_metrics

def evaluate_qat_model(model, X_test, y_test, A_test):
    X_test = X_test.astype(np.float32)

    nan_mask = np.isnan(X_test)
    if nan_mask.any():
        col_means = np.nanmean(X_test, axis=0)
        X_test[nan_mask] = col_means[nan_mask[0]]

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

    model.eval()

    with torch.no_grad():
        output = model(X_test_tensor)
        _, y_pred = torch.max(output, dim=1)

    y_pred_np = y_pred.cpu().numpy()

    qat_metrics = {
        "QAT Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred_np, A_test), 3),
        "QAT Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred_np, A_test), 3),
        "QAT Disparate Impact": round(calculate_disparate_impact(y_test, y_pred_np, A_test), 3),
        "QAT Theil Index": round(calculate_theil_index(y_test, y_pred_np), 3),
        "QAT Theil Index-Within": round(theil_by_group(y_pred_np, A_test), 3),
        "QAT F1 Score": round(f1_score(y_test, y_pred_np), 3),
    }

    return qat_metrics

def evaluate_compressed_model(model, X_test, y_test, A_test):
    output = model(torch.FloatTensor(X_test))
    _, y_pred = torch.max(output, dim=1)

    y_pred = pd.Series(y_pred.detach().numpy())
    y_test = pd.Series(y_test)

    mf = MetricFrame(metrics=f1_score, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)

    compressed_metrics = {
        "Compressed Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred, A_test), 3),
        "Compressed Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred, A_test), 3),
        "Compressed Disparate Impact": round(calculate_disparate_impact(y_test, y_pred, A_test), 3),
        "Compressed Theil Index": round(calculate_theil_index(y_test, y_pred), 3),
        "Compressed Theil Index-Within": round(theil_by_group(y_pred, A_test), 3),
        "Compressed F1 Score": round(f1_score(y_test, y_pred), 3),
    }
    return compressed_metrics


### k‑Fold Cross Validation


In [None]:

X, y, A = load_your_dataset()
num_splits = 10

baseline_fold_metrics = []
compressed_fold_metrics = []
qat_fold_metrics = []

for k in range(num_splits):
    print(f"\nProcessing fold {k + 1}/{num_splits}")

    X_train, y_train, A_train, X_test, y_test, A_test = get_k_fold_split(X, y, A, k, num_splits)

    baseline_model = train_baseline_model(X_train, y_train, k)
    baseline_eval = evaluate_baseline_model(baseline_model, X_test, y_test, A_test)
    model_path = f"model_fold{k}.pkl"

    compressed_model = dynamic_compress_model(model_path)
    compressed_eval = evaluate_compressed_model(compressed_model, X_test, y_test, A_test)

    qat_model = train_qat_model(X_train, y_train, k)
    qat_eval = evaluate_qat_model(qat_model, X_test, y_test, A_test)

    baseline_fold_metrics.append(baseline_eval)
    compressed_fold_metrics.append(compressed_eval)
    qat_fold_metrics.append(qat_eval)

    print(f"Baseline Eval: {baseline_eval}")
    print(f"Compressed Eval: {compressed_eval}")
    print(f"QAT Eval: {qat_eval}")

baseline_df = pd.DataFrame(baseline_fold_metrics)
compressed_df = pd.DataFrame(compressed_fold_metrics)
qat_df = pd.DataFrame(qat_fold_metrics)


baseline_df.to_csv("baseline_metrics.csv", index=True, float_format="%.3f")
compressed_df.to_csv("compressed_metrics.csv", index=True, float_format="%.3f")
qat_df.to_csv("qat_metrics.csv", index=True, float_format="%.3f")


  y = y.replace({'<=50K': 0, '<=50K.': 0, '>50K': 1, '>50K.': 1}).astype(int)



Processing fold 1/10
Fold 1: Train F1 Score = 0.6688




Fold 1: Train F1 Score (QAT) = 0.6852
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.172), 'Baseline Average Odds Difference': np.float64(0.102), 'Baseline Disparate Impact': np.float64(0.303), 'Baseline Theil Index': np.float64(-0.097), 'Baseline Theil Index - Within': group
Female    2.594
Male      1.400
Name: value, dtype: float64, 'Baseline F1 Score': 0.648}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.209), 'Compressed Average Odds Difference': np.float64(0.125), 'Compressed Disparate Impact': np.float64(0.295), 'Compressed Theil Index': np.float64(-0.025), 'Compressed Theil Index-Within': group
Female    2.438
Male      1.217
Name: value, dtype: float64, 'Compressed F1 Score': 0.672}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.218), 'QAT Average Odds Difference': np.float64(0.14), 'QAT Disparate Impact': np.float64(0.276), 'QAT Theil Index': np.float64(-0.022), 'QAT Theil Index-Within': group
Female    2.490




Fold 2: Train F1 Score (QAT) = 0.6604
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.186), 'Baseline Average Odds Difference': np.float64(0.084), 'Baseline Disparate Impact': np.float64(0.316), 'Baseline Theil Index': np.float64(-0.042), 'Baseline Theil Index - Within': group
Female    2.453
Male      1.303
Name: value, dtype: float64, 'Baseline F1 Score': 0.674}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.255), 'Compressed Average Odds Difference': np.float64(0.13), 'Compressed Disparate Impact': np.float64(0.321), 'Compressed Theil Index': np.float64(0.087), 'Compressed Theil Index-Within': group
Female    2.117
Male      0.979
Name: value, dtype: float64, 'Compressed F1 Score': 0.689}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.162), 'QAT Average Odds Difference': np.float64(0.066), 'QAT Disparate Impact': np.float64(0.313), 'QAT Theil Index': np.float64(-0.1), 'QAT Theil Index-Within': group
Female    2.608
Mal



Fold 3: Train F1 Score (QAT) = 0.6720
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.221), 'Baseline Average Odds Difference': np.float64(0.116), 'Baseline Disparate Impact': np.float64(0.259), 'Baseline Theil Index': np.float64(-0.024), 'Baseline Theil Index - Within': group
Female    2.558
Male      1.207
Name: value, dtype: float64, 'Baseline F1 Score': 0.674}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.28), 'Compressed Average Odds Difference': np.float64(0.128), 'Compressed Disparate Impact': np.float64(0.287), 'Compressed Theil Index': np.float64(0.087), 'Compressed Theil Index-Within': group
Female    2.182
Male      0.934
Name: value, dtype: float64, 'Compressed F1 Score': 0.687}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.196), 'QAT Average Odds Difference': np.float64(0.103), 'QAT Disparate Impact': np.float64(0.26), 'QAT Theil Index': np.float64(-0.073), 'QAT Theil Index-Within': group
Female    2.676
Ma



Fold 4: Train F1 Score (QAT) = 0.5769
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.179), 'Baseline Average Odds Difference': np.float64(0.07), 'Baseline Disparate Impact': np.float64(0.352), 'Baseline Theil Index': np.float64(-0.04), 'Baseline Theil Index - Within': group
Female    2.331
Male      1.286
Name: value, dtype: float64, 'Baseline F1 Score': 0.682}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.226), 'Compressed Average Odds Difference': np.float64(0.091), 'Compressed Disparate Impact': np.float64(0.354), 'Compressed Theil Index': np.float64(0.054), 'Compressed Theil Index-Within': group
Female    2.087
Male      1.050
Name: value, dtype: float64, 'Compressed F1 Score': 0.694}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.117), 'QAT Average Odds Difference': np.float64(0.053), 'QAT Disparate Impact': np.float64(0.304), 'QAT Theil Index': np.float64(-0.238), 'QAT Theil Index-Within': group
Female    2.969
Ma



Fold 5: Train F1 Score (QAT) = 0.6823
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.166), 'Baseline Average Odds Difference': np.float64(0.079), 'Baseline Disparate Impact': np.float64(0.294), 'Baseline Theil Index': np.float64(-0.121), 'Baseline Theil Index - Within': group
Female    2.674
Male      1.449
Name: value, dtype: float64, 'Baseline F1 Score': 0.659}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.21), 'Compressed Average Odds Difference': np.float64(0.088), 'Compressed Disparate Impact': np.float64(0.309), 'Compressed Theil Index': np.float64(-0.016), 'Compressed Theil Index-Within': group
Female    2.367
Male      1.193
Name: value, dtype: float64, 'Compressed F1 Score': 0.687}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.189), 'QAT Average Odds Difference': np.float64(0.067), 'QAT Disparate Impact': np.float64(0.33), 'QAT Theil Index': np.float64(-0.041), 'QAT Theil Index-Within': group
Female    2.373
M



Fold 6: Train F1 Score (QAT) = 0.6700
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.168), 'Baseline Average Odds Difference': np.float64(0.082), 'Baseline Disparate Impact': np.float64(0.303), 'Baseline Theil Index': np.float64(-0.107), 'Baseline Theil Index - Within': group
Female    2.619
Male      1.424
Name: value, dtype: float64, 'Baseline F1 Score': 0.654}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.202), 'Compressed Average Odds Difference': np.float64(0.1), 'Compressed Disparate Impact': np.float64(0.294), 'Compressed Theil Index': np.float64(-0.04), 'Compressed Theil Index-Within': group
Female    2.476
Male      1.252
Name: value, dtype: float64, 'Compressed F1 Score': 0.686}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.182), 'QAT Average Odds Difference': np.float64(0.089), 'QAT Disparate Impact': np.float64(0.287), 'QAT Theil Index': np.float64(-0.085), 'QAT Theil Index-Within': group
Female    2.611
Ma



Fold 7: Train F1 Score (QAT) = 0.6456
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.188), 'Baseline Average Odds Difference': np.float64(0.109), 'Baseline Disparate Impact': np.float64(0.302), 'Baseline Theil Index': np.float64(-0.068), 'Baseline Theil Index - Within': group
Female    2.511
Male      1.312
Name: value, dtype: float64, 'Baseline F1 Score': 0.671}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.263), 'Compressed Average Odds Difference': np.float64(0.167), 'Compressed Disparate Impact': np.float64(0.281), 'Compressed Theil Index': np.float64(0.05), 'Compressed Theil Index-Within': group
Female    2.272
Male      1.005
Name: value, dtype: float64, 'Compressed F1 Score': 0.694}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.162), 'QAT Average Odds Difference': np.float64(0.093), 'QAT Disparate Impact': np.float64(0.305), 'QAT Theil Index': np.float64(-0.124), 'QAT Theil Index-Within': group
Female    2.642
M



Fold 8: Train F1 Score (QAT) = 0.6634
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.201), 'Baseline Average Odds Difference': np.float64(0.111), 'Baseline Disparate Impact': np.float64(0.278), 'Baseline Theil Index': np.float64(-0.043), 'Baseline Theil Index - Within': group
Female    2.557
Male      1.277
Name: value, dtype: float64, 'Baseline F1 Score': 0.686}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.229), 'Compressed Average Odds Difference': np.float64(0.131), 'Compressed Disparate Impact': np.float64(0.292), 'Compressed Theil Index': np.float64(0.019), 'Compressed Theil Index-Within': group
Female    2.357
Male      1.128
Name: value, dtype: float64, 'Compressed F1 Score': 0.692}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.162), 'QAT Average Odds Difference': np.float64(0.074), 'QAT Disparate Impact': np.float64(0.311), 'QAT Theil Index': np.float64(-0.104), 'QAT Theil Index-Within': group
Female    2.612




Fold 9: Train F1 Score (QAT) = 0.6769
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.169), 'Baseline Average Odds Difference': np.float64(0.081), 'Baseline Disparate Impact': np.float64(0.296), 'Baseline Theil Index': np.float64(-0.093), 'Baseline Theil Index - Within': group
Female    2.642
Male      1.426
Name: value, dtype: float64, 'Baseline F1 Score': 0.66}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.221), 'Compressed Average Odds Difference': np.float64(0.092), 'Compressed Disparate Impact': np.float64(0.322), 'Compressed Theil Index': np.float64(0.033), 'Compressed Theil Index-Within': group
Female    2.254
Male      1.122
Name: value, dtype: float64, 'Compressed F1 Score': 0.684}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.183), 'QAT Average Odds Difference': np.float64(0.088), 'QAT Disparate Impact': np.float64(0.302), 'QAT Theil Index': np.float64(-0.056), 'QAT Theil Index-Within': group
Female    2.532
M



Fold 10: Train F1 Score (QAT) = 0.6597
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.164), 'Baseline Average Odds Difference': np.float64(0.058), 'Baseline Disparate Impact': np.float64(0.32), 'Baseline Theil Index': np.float64(-0.099), 'Baseline Theil Index - Within': group
Female    2.561
Male      1.422
Name: value, dtype: float64, 'Baseline F1 Score': 0.659}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.217), 'Compressed Average Odds Difference': np.float64(0.089), 'Compressed Disparate Impact': np.float64(0.334), 'Compressed Theil Index': np.float64(0.024), 'Compressed Theil Index-Within': group
Female    2.218
Male      1.121
Name: value, dtype: float64, 'Compressed F1 Score': 0.683}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.162), 'QAT Average Odds Difference': np.float64(0.055), 'QAT Disparate Impact': np.float64(0.329), 'QAT Theil Index': np.float64(-0.099), 'QAT Theil Index-Within': group
Female    2.537


In [None]:
print(baseline_df)
baseline_df.to_csv("baseline_metrics.csv", index=True, float_format="%.3f")

   Baseline Statistical Parity Difference  Baseline Average Odds Difference  \
0                                   0.172                             0.102   
1                                   0.186                             0.084   
2                                   0.221                             0.116   
3                                   0.179                             0.070   
4                                   0.166                             0.079   
5                                   0.168                             0.082   
6                                   0.188                             0.109   
7                                   0.201                             0.111   
8                                   0.169                             0.081   
9                                   0.164                             0.058   

   Baseline Disparate Impact  Baseline Theil Index  \
0                      0.303                -0.097   
1                     

In [None]:
!zip -r colab_files.zip /content
from google.colab import files
files.download('colab_files.zip')

  adding: content/ (stored 0%)
  adding: content/.config/ (stored 0%)
  adding: content/.config/.last_update_check.json (deflated 22%)
  adding: content/.config/active_config (stored 0%)
  adding: content/.config/configurations/ (stored 0%)
  adding: content/.config/configurations/config_default (deflated 15%)
  adding: content/.config/.last_survey_prompt.yaml (stored 0%)
  adding: content/.config/logs/ (stored 0%)
  adding: content/.config/logs/2025.05.09/ (stored 0%)
  adding: content/.config/logs/2025.05.09/13.40.57.224020.log (deflated 92%)
  adding: content/.config/logs/2025.05.09/13.41.27.718573.log (deflated 58%)
  adding: content/.config/logs/2025.05.09/13.41.26.495445.log (deflated 87%)
  adding: content/.config/logs/2025.05.09/13.41.18.092482.log (deflated 58%)
  adding: content/.config/logs/2025.05.09/13.41.36.263788.log (deflated 57%)
  adding: content/.config/logs/2025.05.09/13.41.36.930373.log (deflated 57%)
  adding: content/.config/hidden_gcloud_config_universe_descript

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>