# NewV2 Adult Fairness NetArch2

In [None]:
!pip install ucimlrepo
!pip install fairlearn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.quantization import quantize_dynamic, QConfig, prepare_qat, convert
from torch.quantization.fake_quantize import FakeQuantize
from torch.quantization.observer import MovingAverageMinMaxObserver
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, matthews_corrcoef, f1_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from ucimlrepo import fetch_ucirepo
from fairlearn.metrics import MetricFrame
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from fairlearn.metrics import (
    MetricFrame,
    selection_rate,
    true_positive_rate,
    false_positive_rate
)

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7
Collecting fairlearn
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0


##Code Persistence

In [None]:
def save_model(model, model_path):
  torch.save(model.state_dict(), model_path)
"""
def save_values_to_txt(data, filename):
  with open(filename, 'a') as f:
      for entry in data:
          for key, value in entry.items():
              f.write(f'{key}: {value}\n')
          f.write('\n')
  print(f"Data appended to {filename}")
"""
def save_values_to_txt(data, filename):
    if isinstance(data, dict):
        data = [data]
    with open(filename, 'a') as f:
        for entry in data:
            for key, value in entry.items():
                f.write(f'{key}: {value}\n')
            f.write('\n')
    print(f"Data appended to {filename}")

In [None]:
random.seed(42)
np.random.seed(42)

### Data Loading

In [None]:
def load_your_dataset():
    adult = fetch_ucirepo(id=2)
    X = adult.data.features
    y = adult.data.targets
    y = y.replace({'<=50K': 0, '<=50K.': 0, '>50K': 1, '>50K.': 1}).astype(int)
    le = LabelEncoder()
    y = le.fit_transform(y.values.ravel())
    X_encoded = pd.get_dummies(X, drop_first=True)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(X_encoded).astype(np.float32)
    y_np = np.array(y).astype(np.int64)

    A_np = X["sex"].values

    return X_np, y_np, A_np

X_np, y_np, A_np = load_your_dataset()

  y = y.replace({'<=50K': 0, '<=50K.': 0, '>50K': 1, '>50K.': 1}).astype(int)


## Fairness Evaluation

* Statistical Parity Difference: Closer to 0 is better. 0 means no bias.
* Average Odds Difference: 0 is ideal, indicating no discrimination.
* Disparate Impact: Aim for 1. Values close to 1 indicate fairness.
* Theil Index: Lower is better. 0 means perfect equality.

In [None]:
def calculate_statistical_parity_difference(y_true, y_pred, A_test):
    sr = lambda y_true, y_pred: selection_rate(y_true, y_pred)
    mfm = MetricFrame(metrics=sr, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)
    return mfm.difference(method='between_groups')

def calculate_average_odds_difference(y_true, y_pred, A_test):
    unique_labels = np.unique(y_true)
    if len(unique_labels) == 2:
        pos_label = unique_labels[1]
    else:
        raise ValueError("y_true should have exactly two unique values for binary classification")

    tpr = lambda y_true, y_pred: true_positive_rate(y_true, y_pred, pos_label=pos_label)
    fpr = lambda y_true, y_pred: false_positive_rate(y_true, y_pred, pos_label=pos_label)
    average_odds = lambda y_true, y_pred: (tpr(y_true, y_pred) + fpr(y_true, y_pred)) / 2

    mf = MetricFrame(metrics=average_odds,
                     y_true=y_true,
                     y_pred=y_pred,
                     sensitive_features=A_test)
    return mf.difference(method='between_groups')

def calculate_disparate_impact(y_true, y_pred, A_test):
    sr = lambda y_true, y_pred: selection_rate(y_true, y_pred)
    mf = MetricFrame(metrics=sr, y_true=y_true, y_pred=y_pred, sensitive_features=A_test)
    return mf.ratio(method='between_groups')

def calculate_theil_index(y_true, y_pred):
    actual_pos = np.mean(y_true == 1)
    pred_pos = np.mean(y_pred == 1)

    epsilon = 1e-10

    actual_entropy = -(actual_pos * np.log2(actual_pos + epsilon) + (1 - actual_pos) * np.log2(1 - actual_pos + epsilon))
    pred_entropy = -(pred_pos * np.log2(pred_pos + epsilon) + (1 - pred_pos) * np.log2(1 - pred_pos + epsilon))

    theil_index = pred_entropy - actual_entropy
    return theil_index

##Model Architecture

In [None]:
n_input = X_np.shape[1]

class NetArch1(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(n_input, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)

        self.layernorm1 = nn.LayerNorm(128)
        self.layernorm2 = nn.LayerNorm(64)

    def forward(self, x):
        x = self.layernorm1(F.relu(self.fc1(x)))
        x = self.layernorm2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

class NetArch2(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

class NetArch3(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)

    def forward(self, x):
        x = F.sigmoid(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        x = F.sigmoid(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

## QAT Compatible Model Architecture

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.quantization

n_input = X_np.shape[1]

class QATArch1(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)
        self.layernorm1 = nn.LayerNorm(128)
        self.layernorm2 = nn.LayerNorm(64)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.layernorm1(F.relu(self.fc1(x)))
        x = self.layernorm2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

class QATArch2(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

class QATArch3(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        x = self.fc4(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

In [None]:
def dynamic_compress_model(model_path):
    model = NetArch2()
    model.load_state_dict(torch.load(model_path))
    model.eval()

    compressed_model = quantize_dynamic(
        model=model,
        qconfig_spec={torch.nn.Linear},
        dtype=torch.qint8
    )
    return compressed_model

def prepare_model_for_qat(model):
    model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
    model = torch.quantization.prepare_qat(model)
    return model

def convert_qat_model(model):
    model = torch.quantization.convert(model.eval())
    return model


def get_k_fold_split(X, y,A, k, num_splits=5):
    indices = np.arange(len(X))
    fold_size = len(X) // num_splits

    start = k * fold_size
    end = start + fold_size if k != num_splits - 1 else len(X)

    test_idx = indices[start:end]
    train_idx = np.concatenate([indices[:start], indices[end:]])

    return X[train_idx], y[train_idx], A[train_idx], X[test_idx], y[test_idx], A[test_idx]


def train_baseline_model(X_train, y_train, fold, num_epochs=10, batch_size=64):
  model = NetArch2()
  optimizer = optim.Adam(model.parameters(), lr=1e-3)
  criterion = nn.NLLLoss()

  dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
  train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

  model.train()
  for epoch in range(num_epochs):
      for batch_X, batch_y in train_loader:
          optimizer.zero_grad()
          outputs = model(batch_X)
          loss = criterion(outputs, batch_y)
          loss.backward()
          optimizer.step()

  model.eval()
  y_true, y_pred = [], []
  with torch.no_grad():
      for batch_X, batch_y in train_loader:
          outputs = model(batch_X)
          _, predicted = torch.max(outputs, 1)
          y_true.extend(batch_y.numpy())
          y_pred.extend(predicted.numpy())

  f1 = f1_score(y_true, y_pred)
  print(f"Fold {fold + 1}: Train F1 Score = {f1:.4f}")


  model_path = f"model_fold{fold}.pkl"
  save_model(model, model_path)

  return model

def train_qat_model(X_train, y_train, fold, num_epochs=10, batch_size=64):
    model = QATArch2()
    model = prepare_model_for_qat(model)

    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.NLLLoss()

    dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for batch_X, batch_y in train_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * batch_X.size(0)

    model.eval()
    quantized_model = convert_qat_model(model)

    y_true, y_pred = [], []
    with torch.no_grad():
        for batch_X, batch_y in train_loader:
            outputs = quantized_model(batch_X)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(batch_y.numpy())
            y_pred.extend(predicted.numpy())

    f1 = f1_score(y_true, y_pred)
    print(f"Fold {fold + 1}: Train F1 Score (QAT) = {f1:.4f}")

    model_path = f"qat_model_fold{fold}.pkl"
    save_model(quantized_model, model_path)

    return quantized_model


def evaluate_baseline_model(model, X_test, y_test, A_test):
    output = model.forward(torch.FloatTensor(X_test))
    _, y_pred = torch.max(output, dim=1)

    y_pred = pd.Series(y_pred.detach().numpy())
    y_test = pd.Series(y_test)

    mf1 = MetricFrame(metrics=f1_score, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)

    baseline_metrics = {
        "Baseline Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred, A_test), 3),
        "Baseline Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred, A_test), 3),
        "Baseline Disparate Impact": round(calculate_disparate_impact(y_test, y_pred, A_test), 3),
        "Baseline Theil Index": round(calculate_theil_index(y_test, y_pred),3),
        "Baseline F1 Score": round(f1_score(y_test, y_pred),3)
    }

    return baseline_metrics

def evaluate_qat_model(model, X_test, y_test, A_test):
    X_test = X_test.astype(np.float32)

    nan_mask = np.isnan(X_test)
    if nan_mask.any():
        col_means = np.nanmean(X_test, axis=0)
        X_test[nan_mask] = col_means[nan_mask[0]]

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

    model.eval()

    with torch.no_grad():
        output = model(X_test_tensor)
        _, y_pred = torch.max(output, dim=1)

    y_pred_np = y_pred.cpu().numpy()

    qat_metrics = {
        "QAT Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred_np, A_test), 3),
        "QAT Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred_np, A_test), 3),
        "QAT Disparate Impact": round(calculate_disparate_impact(y_test, y_pred_np, A_test), 3),
        "QAT Theil Index": round(calculate_theil_index(y_test, y_pred_np), 3),
        "QAT F1 Score": round(f1_score(y_test, y_pred_np), 3),
        "F1 Score": round(f1_score(y_test, y_pred_np), 3)
    }

    return qat_metrics

def evaluate_compressed_model(model, X_test, y_test, A_test):
    output = model(torch.FloatTensor(X_test))
    _, y_pred = torch.max(output, dim=1)

    y_pred = pd.Series(y_pred.detach().numpy())
    y_test = pd.Series(y_test)

    mf = MetricFrame(metrics=f1_score, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)

    compressed_metrics = {
        "Compressed Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred, A_test), 3),
        "Compressed Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred, A_test), 3),
        "Compressed Disparate Impact": round(calculate_disparate_impact(y_test, y_pred, A_test), 3),
        "Compressed Theil Index": round(calculate_theil_index(y_test, y_pred), 3),
        "Compressed F1 Score": round(f1_score(y_test, y_pred), 3),
    }

    return compressed_metrics


### k‑Fold Cross Validation


In [None]:

X, y, A = load_your_dataset()
num_splits = 10

baseline_fold_metrics = []
compressed_fold_metrics = []
qat_fold_metrics = []

for k in range(num_splits):
    print(f"\nProcessing fold {k + 1}/{num_splits}")

    X_train, y_train, A_train, X_test, y_test, A_test = get_k_fold_split(X, y, A, k, num_splits)

    baseline_model = train_baseline_model(X_train, y_train, k)
    baseline_eval = evaluate_baseline_model(baseline_model, X_test, y_test, A_test)
    model_path = f"model_fold{k}.pkl"

    compressed_model = dynamic_compress_model(model_path)
    compressed_eval = evaluate_compressed_model(compressed_model, X_test, y_test, A_test)

    qat_model = train_qat_model(X_train, y_train, k)
    qat_eval = evaluate_qat_model(qat_model, X_test, y_test, A_test)

    baseline_fold_metrics.append(baseline_eval)
    compressed_fold_metrics.append(compressed_eval)
    qat_fold_metrics.append(qat_eval)

    print(f"Baseline Eval: {baseline_eval}")
    print(f"Compressed Eval: {compressed_eval}")
    print(f"QAT Eval: {qat_eval}")

baseline_df = pd.DataFrame(baseline_fold_metrics)
compressed_df = pd.DataFrame(compressed_fold_metrics)
qat_df = pd.DataFrame(qat_fold_metrics)


baseline_df.to_csv("baseline_metrics.csv", index=True, float_format="%.3f")
compressed_df.to_csv("compressed_metrics.csv", index=True, float_format="%.3f")
qat_df.to_csv("qat_metrics.csv", index=True, float_format="%.3f")


  y = y.replace({'<=50K': 0, '<=50K.': 0, '>50K': 1, '>50K.': 1}).astype(int)



Processing fold 1/10
Fold 1: Train F1 Score = 0.7268




Fold 1: Train F1 Score (QAT) = 0.7093
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.19), 'Baseline Average Odds Difference': np.float64(0.091), 'Baseline Disparate Impact': np.float64(0.353), 'Baseline Theil Index': np.float64(-0.019), 'Baseline F1 Score': 0.674}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.204), 'Compressed Average Odds Difference': np.float64(0.1), 'Compressed Disparate Impact': np.float64(0.351), 'Compressed Theil Index': np.float64(0.009), 'Compressed F1 Score': 0.683}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.187), 'QAT Average Odds Difference': np.float64(0.104), 'QAT Disparate Impact': np.float64(0.319), 'QAT Theil Index': np.float64(-0.052), 'QAT F1 Score': 0.662, 'F1 Score': 0.662}

Processing fold 2/10
Fold 2: Train F1 Score = 0.7007




Fold 2: Train F1 Score (QAT) = 0.6986
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.155), 'Baseline Average Odds Difference': np.float64(0.062), 'Baseline Disparate Impact': np.float64(0.327), 'Baseline Theil Index': np.float64(-0.105), 'Baseline F1 Score': 0.654}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.199), 'Compressed Average Odds Difference': np.float64(0.074), 'Compressed Disparate Impact': np.float64(0.349), 'Compressed Theil Index': np.float64(0.011), 'Compressed F1 Score': 0.689}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.159), 'QAT Average Odds Difference': np.float64(0.065), 'QAT Disparate Impact': np.float64(0.329), 'QAT Theil Index': np.float64(-0.094), 'QAT F1 Score': 0.664, 'F1 Score': 0.664}

Processing fold 3/10
Fold 3: Train F1 Score = 0.7055




Fold 3: Train F1 Score (QAT) = 0.6982
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.184), 'Baseline Average Odds Difference': np.float64(0.095), 'Baseline Disparate Impact': np.float64(0.261), 'Baseline Theil Index': np.float64(-0.097), 'Baseline F1 Score': 0.662}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.222), 'Compressed Average Odds Difference': np.float64(0.105), 'Compressed Disparate Impact': np.float64(0.279), 'Compressed Theil Index': np.float64(-0.008), 'Compressed F1 Score': 0.678}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.168), 'QAT Average Odds Difference': np.float64(0.036), 'QAT Disparate Impact': np.float64(0.335), 'QAT Theil Index': np.float64(-0.08), 'QAT F1 Score': 0.659, 'F1 Score': 0.659}

Processing fold 4/10
Fold 4: Train F1 Score = 0.7122




Fold 4: Train F1 Score (QAT) = 0.6736
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.187), 'Baseline Average Odds Difference': np.float64(0.093), 'Baseline Disparate Impact': np.float64(0.31), 'Baseline Theil Index': np.float64(-0.057), 'Baseline F1 Score': 0.672}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.21), 'Compressed Average Odds Difference': np.float64(0.095), 'Compressed Disparate Impact': np.float64(0.328), 'Compressed Theil Index': np.float64(0.005), 'Compressed F1 Score': 0.681}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.148), 'QAT Average Odds Difference': np.float64(0.058), 'QAT Disparate Impact': np.float64(0.358), 'QAT Theil Index': np.float64(-0.112), 'QAT F1 Score': 0.645, 'F1 Score': 0.645}

Processing fold 5/10
Fold 5: Train F1 Score = 0.7024




Fold 5: Train F1 Score (QAT) = 0.7094
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.159), 'Baseline Average Odds Difference': np.float64(0.048), 'Baseline Disparate Impact': np.float64(0.331), 'Baseline Theil Index': np.float64(-0.109), 'Baseline F1 Score': 0.673}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.276), 'Compressed Average Odds Difference': np.float64(0.138), 'Compressed Disparate Impact': np.float64(0.299), 'Compressed Theil Index': np.float64(0.084), 'Compressed F1 Score': 0.67}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.17), 'QAT Average Odds Difference': np.float64(0.064), 'QAT Disparate Impact': np.float64(0.335), 'QAT Theil Index': np.float64(-0.079), 'QAT F1 Score': 0.682, 'F1 Score': 0.682}

Processing fold 6/10
Fold 6: Train F1 Score = 0.6980




Fold 6: Train F1 Score (QAT) = 0.6997
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.168), 'Baseline Average Odds Difference': np.float64(0.075), 'Baseline Disparate Impact': np.float64(0.299), 'Baseline Theil Index': np.float64(-0.11), 'Baseline F1 Score': 0.655}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.201), 'Compressed Average Odds Difference': np.float64(0.096), 'Compressed Disparate Impact': np.float64(0.287), 'Compressed Theil Index': np.float64(-0.047), 'Compressed F1 Score': 0.668}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.195), 'QAT Average Odds Difference': np.float64(0.112), 'QAT Disparate Impact': np.float64(0.28), 'QAT Theil Index': np.float64(-0.063), 'QAT F1 Score': 0.674, 'F1 Score': 0.674}

Processing fold 7/10
Fold 7: Train F1 Score = 0.6982




Fold 7: Train F1 Score (QAT) = 0.7015
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.166), 'Baseline Average Odds Difference': np.float64(0.097), 'Baseline Disparate Impact': np.float64(0.303), 'Baseline Theil Index': np.float64(-0.116), 'Baseline F1 Score': 0.644}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.207), 'Compressed Average Odds Difference': np.float64(0.117), 'Compressed Disparate Impact': np.float64(0.317), 'Compressed Theil Index': np.float64(-0.018), 'Compressed F1 Score': 0.673}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.184), 'QAT Average Odds Difference': np.float64(0.105), 'QAT Disparate Impact': np.float64(0.311), 'QAT Theil Index': np.float64(-0.07), 'QAT F1 Score': 0.659, 'F1 Score': 0.659}

Processing fold 8/10
Fold 8: Train F1 Score = 0.7073




Fold 8: Train F1 Score (QAT) = 0.7051
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.186), 'Baseline Average Odds Difference': np.float64(0.126), 'Baseline Disparate Impact': np.float64(0.26), 'Baseline Theil Index': np.float64(-0.089), 'Baseline F1 Score': 0.654}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.213), 'Compressed Average Odds Difference': np.float64(0.13), 'Compressed Disparate Impact': np.float64(0.267), 'Compressed Theil Index': np.float64(-0.029), 'Compressed F1 Score': 0.684}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.18), 'QAT Average Odds Difference': np.float64(0.083), 'QAT Disparate Impact': np.float64(0.299), 'QAT Theil Index': np.float64(-0.072), 'QAT F1 Score': 0.67, 'F1 Score': 0.67}

Processing fold 9/10
Fold 9: Train F1 Score = 0.6974




Fold 9: Train F1 Score (QAT) = 0.6959
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.141), 'Baseline Average Odds Difference': np.float64(0.021), 'Baseline Disparate Impact': np.float64(0.381), 'Baseline Theil Index': np.float64(-0.102), 'Baseline F1 Score': 0.646}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.185), 'Compressed Average Odds Difference': np.float64(0.047), 'Compressed Disparate Impact': np.float64(0.39), 'Compressed Theil Index': np.float64(0.015), 'Compressed F1 Score': 0.678}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.156), 'QAT Average Odds Difference': np.float64(0.06), 'QAT Disparate Impact': np.float64(0.341), 'QAT Theil Index': np.float64(-0.091), 'QAT F1 Score': 0.651, 'F1 Score': 0.651}

Processing fold 10/10
Fold 10: Train F1 Score = 0.7192




Fold 10: Train F1 Score (QAT) = 0.7055
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.171), 'Baseline Average Odds Difference': np.float64(0.058), 'Baseline Disparate Impact': np.float64(0.361), 'Baseline Theil Index': np.float64(-0.05), 'Baseline F1 Score': 0.68}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.205), 'Compressed Average Odds Difference': np.float64(0.075), 'Compressed Disparate Impact': np.float64(0.382), 'Compressed Theil Index': np.float64(0.039), 'Compressed F1 Score': 0.673}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.178), 'QAT Average Odds Difference': np.float64(0.079), 'QAT Disparate Impact': np.float64(0.309), 'QAT Theil Index': np.float64(-0.074), 'QAT F1 Score': 0.664, 'F1 Score': 0.664}


In [None]:
print(baseline_df)
baseline_df.to_csv("baseline_metrics.csv", index=True, float_format="%.3f")

   Baseline Statistical Parity Difference  Baseline Average Odds Difference  \
0                                   0.188                             0.103   
1                                   0.162                             0.035   
2                                   0.207                             0.104   
3                                   0.169                             0.046   
4                                   0.148                             0.038   
5                                   0.199                             0.102   
6                                   0.194                             0.110   
7                                   0.178                             0.113   
8                                   0.187                             0.086   
9                                   0.184                             0.095   

   Baseline Disparate Impact  Baseline Theil Index  Baseline F1 Score  
0                      0.322                -0.047        

In [None]:
!zip -r colab_files.zip /content
from google.colab import files
files.download('colab_files.zip')