# NewV2 Adult Fairness NetArch1

This notebook has been updated to include two evaluation approaches:

1. **k‑Fold Cross Validation:** For each fold, the same train/test split is used for both the baseline‑compressed and QAT models. The results are then aggregated across all folds.
2. **Fixed Split Evaluation:** A single fixed split (using, e.g., the 0th fold) is used to train and evaluate both models for a fair comparison.

In [None]:
!pip install ucimlrepo
!pip install fairlearn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.quantization import quantize_dynamic, QConfig, prepare_qat, convert
from torch.quantization.fake_quantize import FakeQuantize
from torch.quantization.observer import MovingAverageMinMaxObserver
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, matthews_corrcoef, f1_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from ucimlrepo import fetch_ucirepo
from fairlearn.metrics import MetricFrame
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from fairlearn.metrics import (
    MetricFrame,
    selection_rate,
    true_positive_rate,
    false_positive_rate
)



##Code Persistence

In [None]:
def save_model(model, model_path):
  torch.save(model.state_dict(), model_path)
"""
def save_values_to_txt(data, filename):
  with open(filename, 'a') as f:
      for entry in data:
          for key, value in entry.items():
              f.write(f'{key}: {value}\n')
          f.write('\n')
  print(f"Data appended to {filename}")
"""
def save_values_to_txt(data, filename):
    if isinstance(data, dict):
        data = [data]
    with open(filename, 'a') as f:
        for entry in data:
            for key, value in entry.items():
                f.write(f'{key}: {value}\n')
            f.write('\n')
    print(f"Data appended to {filename}")

In [None]:
random.seed(42)
np.random.seed(42)

### Data Loading

In [None]:
def load_your_dataset():
    adult = fetch_ucirepo(id=2)
    X = adult.data.features
    y = adult.data.targets
    y = y.replace({'<=50K': 0, '<=50K.': 0, '>50K': 1, '>50K.': 1}).astype(int)
    le = LabelEncoder()
    y = le.fit_transform(y.values.ravel())
    X_encoded = pd.get_dummies(X, drop_first=True)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(X_encoded).astype(np.float32)
    y_np = np.array(y).astype(np.int64)

    A_np = X["sex"].values

    return X_np, y_np, A_np

X_np, y_np, A_np = load_your_dataset()

  y = y.replace({'<=50K': 0, '<=50K.': 0, '>50K': 1, '>50K.': 1}).astype(int)


## Fairness Evaluation

* Statistical Parity Difference: Closer to 0 is better. 0 means no bias.
* Average Odds Difference: 0 is ideal, indicating no discrimination.
* Disparate Impact: Aim for 1. Values close to 1 indicate fairness.
* Theil Index: Lower is better. 0 means perfect equality.

In [None]:
def calculate_statistical_parity_difference(y_true, y_pred, A_test):
    sr = lambda y_true, y_pred: selection_rate(y_true, y_pred)
    mfm = MetricFrame(metrics=sr, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)
    return mfm.difference(method='between_groups')

def calculate_average_odds_difference(y_true, y_pred, A_test):
    unique_labels = np.unique(y_true)
    if len(unique_labels) == 2:
        pos_label = unique_labels[1]
    else:
        raise ValueError("y_true should have exactly two unique values for binary classification")

    tpr = lambda y_true, y_pred: true_positive_rate(y_true, y_pred, pos_label=pos_label)
    fpr = lambda y_true, y_pred: false_positive_rate(y_true, y_pred, pos_label=pos_label)
    average_odds = lambda y_true, y_pred: (tpr(y_true, y_pred) + fpr(y_true, y_pred)) / 2

    mf = MetricFrame(metrics=average_odds,
                     y_true=y_true,
                     y_pred=y_pred,
                     sensitive_features=A_test)
    return mf.difference(method='between_groups')

def calculate_disparate_impact(y_true, y_pred, A_test):
    sr = lambda y_true, y_pred: selection_rate(y_true, y_pred)
    mf = MetricFrame(metrics=sr, y_true=y_true, y_pred=y_pred, sensitive_features=A_test)
    return mf.ratio(method='between_groups')

def calculate_theil_index(y_true, y_pred):
    actual_pos = np.mean(y_true == 1)
    pred_pos = np.mean(y_pred == 1)

    epsilon = 1e-10

    actual_entropy = -(actual_pos * np.log2(actual_pos + epsilon) + (1 - actual_pos) * np.log2(1 - actual_pos + epsilon))
    pred_entropy = -(pred_pos * np.log2(pred_pos + epsilon) + (1 - pred_pos) * np.log2(1 - pred_pos + epsilon))

    theil_index = pred_entropy - actual_entropy

    #mf = MetricFrame(metrics=sr, y_true=y_true, y_pred=y_pred, sensitive_features=A_test)
    #mf.difference(method='between_groups')
    return theil_index
    """
    actual_pos = np.mean(y_true == 1)
    pred_pos = np.mean(y_pred == 1)

    epsilon = 1e-10

    # KL Divergence Calculation (non-negative)
    actual_entropy = -(actual_pos * np.log2(actual_pos + epsilon) + (1 - actual_pos) * np.log2(1 - actual_pos + epsilon))
    pred_entropy = -(pred_pos * np.log2(pred_pos + epsilon) + (1 - pred_pos) * np.log2(1 - pred_pos + epsilon))

    theil_index = np.abs(pred_entropy - actual_entropy)  # Use absolute difference to avoid negatives
    return theil_index
    """

##Model Architecture

In [None]:
n_input = X_np.shape[1]

class NetArch1(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(n_input, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)

        self.layernorm1 = nn.LayerNorm(128)
        self.layernorm2 = nn.LayerNorm(64)

    def forward(self, x):
        x = self.layernorm1(F.relu(self.fc1(x)))
        x = self.layernorm2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

class NetArch2(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

class NetArch3(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)

    def forward(self, x):
        x = F.sigmoid(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        x = F.sigmoid(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

## QAT Compatible Model Architecture

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.quantization

n_input = X_np.shape[1]

class QATArch1(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)
        self.layernorm1 = nn.LayerNorm(128)
        self.layernorm2 = nn.LayerNorm(64)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.layernorm1(F.relu(self.fc1(x)))
        x = self.layernorm2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

class QATArch2(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

class QATArch3(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(n_input, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        x = self.fc4(x)
        x = self.dequant(x)
        return F.log_softmax(x, dim=1)

In [None]:
def dynamic_compress_model(model_path):
    model = NetArch1()
    model.load_state_dict(torch.load(model_path))
    model.eval()

    compressed_model = quantize_dynamic(
        model=model,
        qconfig_spec={torch.nn.Linear},
        dtype=torch.qint8
    )
    return compressed_model

def prepare_model_for_qat(model):
    model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
    model = torch.quantization.prepare_qat(model)
    return model

def convert_qat_model(model):
    model = torch.quantization.convert(model.eval())
    return model


def get_k_fold_split(X, y,A, k, num_splits=5):
    indices = np.arange(len(X))
    fold_size = len(X) // num_splits

    start = k * fold_size
    end = start + fold_size if k != num_splits - 1 else len(X)

    test_idx = indices[start:end]
    train_idx = np.concatenate([indices[:start], indices[end:]])

    return X[train_idx], y[train_idx], A[train_idx], X[test_idx], y[test_idx], A[test_idx]


def train_baseline_model(X_train, y_train, fold, num_epochs=10, batch_size=64):
  model = NetArch1()
  optimizer = optim.Adam(model.parameters(), lr=1e-3)
  criterion = nn.NLLLoss()

  dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
  train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

  model.train()
  for epoch in range(num_epochs):
      for batch_X, batch_y in train_loader:
          optimizer.zero_grad()
          outputs = model(batch_X)
          loss = criterion(outputs, batch_y)
          loss.backward()
          optimizer.step()

  model.eval()
  y_true, y_pred = [], []
  with torch.no_grad():
      for batch_X, batch_y in train_loader:
          outputs = model(batch_X)
          _, predicted = torch.max(outputs, 1)
          y_true.extend(batch_y.numpy())
          y_pred.extend(predicted.numpy())

  f1 = f1_score(y_true, y_pred)
  print(f"Fold {fold + 1}: Train F1 Score = {f1:.4f}")


  model_path = f"model_fold{fold}.pkl"
  save_model(model, model_path)

  return model

def train_qat_model(X_train, y_train, fold, num_epochs=10, batch_size=64):
    model = QATArch1()
    model = prepare_model_for_qat(model)

    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.NLLLoss()

    dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for batch_X, batch_y in train_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * batch_X.size(0)

    model.eval()
    quantized_model = convert_qat_model(model)

    y_true, y_pred = [], []
    with torch.no_grad():
        for batch_X, batch_y in train_loader:
            outputs = quantized_model(batch_X)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(batch_y.numpy())
            y_pred.extend(predicted.numpy())

    f1 = f1_score(y_true, y_pred)
    print(f"Fold {fold + 1}: Train F1 Score (QAT) = {f1:.4f}")

    model_path = f"qat_model_fold{fold}.pkl"
    save_model(quantized_model, model_path)

    return quantized_model


def evaluate_baseline_model(model, X_test, y_test, A_test):
    output = model.forward(torch.FloatTensor(X_test))
    _, y_pred = torch.max(output, dim=1)

    y_pred = pd.Series(y_pred.detach().numpy())
    y_test = pd.Series(y_test)

    mf1 = MetricFrame(metrics=f1_score, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)

    baseline_metrics = {
        "Baseline Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred, A_test), 3),
        "Baseline Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred, A_test), 3),
        "Baseline Disparate Impact": round(calculate_disparate_impact(y_test, y_pred, A_test), 3),
        "Baseline Theil Index": round(calculate_theil_index(y_test, y_pred),3),
        "Baseline F1 Score": round(f1_score(y_test, y_pred),3)
    }

    return baseline_metrics

def evaluate_qat_model(model, X_test, y_test, A_test):
    X_test = X_test.astype(np.float32)

    nan_mask = np.isnan(X_test)
    if nan_mask.any():
        col_means = np.nanmean(X_test, axis=0)
        X_test[nan_mask] = col_means[nan_mask[0]]

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

    model.eval()

    with torch.no_grad():
        output = model(X_test_tensor)
        _, y_pred = torch.max(output, dim=1)

    y_pred_np = y_pred.cpu().numpy()

    qat_metrics = {
        "QAT Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred_np, A_test), 3),
        "QAT Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred_np, A_test), 3),
        "QAT Disparate Impact": round(calculate_disparate_impact(y_test, y_pred_np, A_test), 3),
        "QAT Theil Index": round(calculate_theil_index(y_test, y_pred_np), 3),
        "QAT F1 Score": round(f1_score(y_test, y_pred_np), 3),
        "F1 Score": round(f1_score(y_test, y_pred_np), 3)
    }

    return qat_metrics

def evaluate_compressed_model(model, X_test, y_test, A_test):
    output = model(torch.FloatTensor(X_test))
    _, y_pred = torch.max(output, dim=1)

    y_pred = pd.Series(y_pred.detach().numpy())
    y_test = pd.Series(y_test)

    mf = MetricFrame(metrics=f1_score, y_true=y_test, y_pred=y_pred, sensitive_features=A_test)

    compressed_metrics = {
        "Compressed Statistical Parity Difference": round(calculate_statistical_parity_difference(y_test, y_pred, A_test), 3),
        "Compressed Average Odds Difference": round(calculate_average_odds_difference(y_test, y_pred, A_test), 3),
        "Compressed Disparate Impact": round(calculate_disparate_impact(y_test, y_pred, A_test), 3),
        "Compressed Theil Index": round(calculate_theil_index(y_test, y_pred), 3),
        "Compressed F1 Score": round(f1_score(y_test, y_pred), 3),
    }

    return compressed_metrics


### k‑Fold Cross Validation


In [None]:

X, y, A = load_your_dataset()
num_splits = 10

baseline_fold_metrics = []
compressed_fold_metrics = []
qat_fold_metrics = []

for k in range(num_splits):
    print(f"\nProcessing fold {k + 1}/{num_splits}")

    X_train, y_train, A_train, X_test, y_test, A_test = get_k_fold_split(X, y, A, k, num_splits)

    baseline_model = train_baseline_model(X_train, y_train, k)
    baseline_eval = evaluate_baseline_model(baseline_model, X_test, y_test, A_test)
    model_path = f"model_fold{k}.pkl"

    compressed_model = dynamic_compress_model(model_path)
    compressed_eval = evaluate_compressed_model(compressed_model, X_test, y_test, A_test)

    qat_model = train_qat_model(X_train, y_train, k)
    qat_eval = evaluate_qat_model(qat_model, X_test, y_test, A_test)

    baseline_fold_metrics.append(baseline_eval)
    compressed_fold_metrics.append(compressed_eval)
    qat_fold_metrics.append(qat_eval)

    print(f"Baseline Eval: {baseline_eval}")
    print(f"Compressed Eval: {compressed_eval}")
    print(f"QAT Eval: {qat_eval}")

baseline_df = pd.DataFrame(baseline_fold_metrics)
compressed_df = pd.DataFrame(compressed_fold_metrics)
qat_df = pd.DataFrame(qat_fold_metrics)


baseline_df.to_csv("baseline_metrics.csv", index=True, float_format="%.3f")
compressed_df.to_csv("compressed_metrics.csv", index=True, float_format="%.3f")
qat_df.to_csv("qat_metrics.csv", index=True, float_format="%.3f")


  y = y.replace({'<=50K': 0, '<=50K.': 0, '>50K': 1, '>50K.': 1}).astype(int)



Processing fold 1/10
Fold 1: Train F1 Score = 0.7180




Fold 1: Train F1 Score (QAT) = 0.7183
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.172), 'Baseline Average Odds Difference': np.float64(0.087), 'Baseline Disparate Impact': np.float64(0.329), 'Baseline Theil Index': np.float64(-0.077), 'Baseline F1 Score': 0.653}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.191), 'Compressed Average Odds Difference': np.float64(0.115), 'Compressed Disparate Impact': np.float64(0.305), 'Compressed Theil Index': np.float64(-0.054), 'Compressed F1 Score': 0.657}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.174), 'QAT Average Odds Difference': np.float64(0.083), 'QAT Disparate Impact': np.float64(0.337), 'QAT Theil Index': np.float64(-0.066), 'QAT F1 Score': 0.673, 'F1 Score': 0.673}

Processing fold 2/10
Fold 2: Train F1 Score = 0.7182




Fold 2: Train F1 Score (QAT) = 0.7096
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.159), 'Baseline Average Odds Difference': np.float64(0.036), 'Baseline Disparate Impact': np.float64(0.38), 'Baseline Theil Index': np.float64(-0.054), 'Baseline F1 Score': 0.666}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.185), 'Compressed Average Odds Difference': np.float64(0.04), 'Compressed Disparate Impact': np.float64(0.384), 'Compressed Theil Index': np.float64(0.01), 'Compressed F1 Score': 0.682}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.143), 'QAT Average Odds Difference': np.float64(0.028), 'QAT Disparate Impact': np.float64(0.389), 'QAT Theil Index': np.float64(-0.09), 'QAT F1 Score': 0.665, 'F1 Score': 0.665}

Processing fold 3/10
Fold 3: Train F1 Score = 0.7223




Fold 3: Train F1 Score (QAT) = 0.6999
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.185), 'Baseline Average Odds Difference': np.float64(0.085), 'Baseline Disparate Impact': np.float64(0.29), 'Baseline Theil Index': np.float64(-0.074), 'Baseline F1 Score': 0.657}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.212), 'Compressed Average Odds Difference': np.float64(0.084), 'Compressed Disparate Impact': np.float64(0.304), 'Compressed Theil Index': np.float64(-0.01), 'Compressed F1 Score': 0.676}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.175), 'QAT Average Odds Difference': np.float64(0.069), 'QAT Disparate Impact': np.float64(0.277), 'QAT Theil Index': np.float64(-0.107), 'QAT F1 Score': 0.646, 'F1 Score': 0.646}

Processing fold 4/10
Fold 4: Train F1 Score = 0.7301




Fold 4: Train F1 Score (QAT) = 0.7210
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.18), 'Baseline Average Odds Difference': np.float64(0.063), 'Baseline Disparate Impact': np.float64(0.365), 'Baseline Theil Index': np.float64(-0.028), 'Baseline F1 Score': 0.68}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.213), 'Compressed Average Odds Difference': np.float64(0.092), 'Compressed Disparate Impact': np.float64(0.349), 'Compressed Theil Index': np.float64(0.027), 'Compressed F1 Score': 0.69}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.186), 'QAT Average Odds Difference': np.float64(0.081), 'QAT Disparate Impact': np.float64(0.341), 'QAT Theil Index': np.float64(-0.035), 'QAT F1 Score': 0.68, 'F1 Score': 0.68}

Processing fold 5/10
Fold 5: Train F1 Score = 0.7255




Fold 5: Train F1 Score (QAT) = 0.7123
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.185), 'Baseline Average Odds Difference': np.float64(0.072), 'Baseline Disparate Impact': np.float64(0.323), 'Baseline Theil Index': np.float64(-0.055), 'Baseline F1 Score': 0.692}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.219), 'Compressed Average Odds Difference': np.float64(0.103), 'Compressed Disparate Impact': np.float64(0.305), 'Compressed Theil Index': np.float64(-0.001), 'Compressed F1 Score': 0.682}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.164), 'QAT Average Odds Difference': np.float64(0.059), 'QAT Disparate Impact': np.float64(0.34), 'QAT Theil Index': np.float64(-0.09), 'QAT F1 Score': 0.681, 'F1 Score': 0.681}

Processing fold 6/10
Fold 6: Train F1 Score = 0.7221




Fold 6: Train F1 Score (QAT) = 0.7100
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.199), 'Baseline Average Odds Difference': np.float64(0.089), 'Baseline Disparate Impact': np.float64(0.288), 'Baseline Theil Index': np.float64(-0.051), 'Baseline F1 Score': 0.681}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.222), 'Compressed Average Odds Difference': np.float64(0.095), 'Compressed Disparate Impact': np.float64(0.284), 'Compressed Theil Index': np.float64(-0.01), 'Compressed F1 Score': 0.678}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.175), 'QAT Average Odds Difference': np.float64(0.061), 'QAT Disparate Impact': np.float64(0.311), 'QAT Theil Index': np.float64(-0.084), 'QAT F1 Score': 0.668, 'F1 Score': 0.668}

Processing fold 7/10
Fold 7: Train F1 Score = 0.7066




Fold 7: Train F1 Score (QAT) = 0.7154
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.184), 'Baseline Average Odds Difference': np.float64(0.118), 'Baseline Disparate Impact': np.float64(0.284), 'Baseline Theil Index': np.float64(-0.09), 'Baseline F1 Score': 0.653}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.215), 'Compressed Average Odds Difference': np.float64(0.131), 'Compressed Disparate Impact': np.float64(0.295), 'Compressed Theil Index': np.float64(-0.019), 'Compressed F1 Score': 0.671}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.179), 'QAT Average Odds Difference': np.float64(0.089), 'QAT Disparate Impact': np.float64(0.339), 'QAT Theil Index': np.float64(-0.059), 'QAT F1 Score': 0.659, 'F1 Score': 0.659}

Processing fold 8/10
Fold 8: Train F1 Score = 0.7155




Fold 8: Train F1 Score (QAT) = 0.7184
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.179), 'Baseline Average Odds Difference': np.float64(0.092), 'Baseline Disparate Impact': np.float64(0.29), 'Baseline Theil Index': np.float64(-0.08), 'Baseline F1 Score': 0.664}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.189), 'Compressed Average Odds Difference': np.float64(0.095), 'Compressed Disparate Impact': np.float64(0.289), 'Compressed Theil Index': np.float64(-0.06), 'Compressed F1 Score': 0.671}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.187), 'QAT Average Odds Difference': np.float64(0.088), 'QAT Disparate Impact': np.float64(0.303), 'QAT Theil Index': np.float64(-0.055), 'QAT F1 Score': 0.673, 'F1 Score': 0.673}

Processing fold 9/10
Fold 9: Train F1 Score = 0.7040




Fold 9: Train F1 Score (QAT) = 0.7172
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.129), 'Baseline Average Odds Difference': np.float64(0.023), 'Baseline Disparate Impact': np.float64(0.398), 'Baseline Theil Index': np.float64(-0.122), 'Baseline F1 Score': 0.649}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.152), 'Compressed Average Odds Difference': np.float64(0.032), 'Compressed Disparate Impact': np.float64(0.396), 'Compressed Theil Index': np.float64(-0.06), 'Compressed F1 Score': 0.655}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.167), 'QAT Average Odds Difference': np.float64(0.046), 'QAT Disparate Impact': np.float64(0.341), 'QAT Theil Index': np.float64(-0.064), 'QAT F1 Score': 0.666, 'F1 Score': 0.666}

Processing fold 10/10
Fold 10: Train F1 Score = 0.7324




Fold 10: Train F1 Score (QAT) = 0.7040
Baseline Eval: {'Baseline Statistical Parity Difference': np.float64(0.195), 'Baseline Average Odds Difference': np.float64(0.095), 'Baseline Disparate Impact': np.float64(0.33), 'Baseline Theil Index': np.float64(-0.022), 'Baseline F1 Score': 0.676}
Compressed Eval: {'Compressed Statistical Parity Difference': np.float64(0.216), 'Compressed Average Odds Difference': np.float64(0.123), 'Compressed Disparate Impact': np.float64(0.318), 'Compressed Theil Index': np.float64(0.01), 'Compressed F1 Score': 0.673}
QAT Eval: {'QAT Statistical Parity Difference': np.float64(0.16), 'QAT Average Odds Difference': np.float64(0.075), 'QAT Disparate Impact': np.float64(0.319), 'QAT Theil Index': np.float64(-0.11), 'QAT F1 Score': 0.656, 'F1 Score': 0.656}


In [None]:
print(baseline_df)
baseline_df.to_csv("baseline_metrics.csv", index=True, float_format="%.3f")

   Baseline Statistical Parity Difference  Baseline Average Odds Difference  \
0                                   0.188                             0.103   
1                                   0.162                             0.035   
2                                   0.207                             0.104   
3                                   0.169                             0.046   
4                                   0.148                             0.038   
5                                   0.199                             0.102   
6                                   0.194                             0.110   
7                                   0.178                             0.113   
8                                   0.187                             0.086   
9                                   0.184                             0.095   

   Baseline Disparate Impact  Baseline Theil Index  Baseline F1 Score  
0                      0.322                -0.047        

In [None]:
!zip -r colab_files.zip /content
from google.colab import files
files.download('colab_files.zip')

###Friedman test