In [10]:
import numpy as np
import pandas as pd
from fairlearn.metrics import (
    demographic_parity_difference,
    demographic_parity_ratio,
    equalized_odds_difference,
    equalized_odds_ratio,
    mean_prediction,
)

## 群体公平

### Demographic Parity（人口学平等性）

#### Difference

y_true 无影响

该值越接近 0 越好

$$
\text{Demographic Parity Difference}=||p(\hat{y}=1 | a) - p(\hat{y}=1|\bar{a})||
$$

#### Ratio

该值越接近 1 越好

$$
\text{Demographic Parity Ratio}=
\min(
    \frac{p(\hat{y}=1 | a)}{p(\hat{y}=1|\bar{a})}, 
    \frac{p(\hat{y}=1 | \bar{a})}{p(\hat{y}=1|a)}
    )
$$

In [11]:
y_true = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
y_pred = np.array([0, 1, 1, 1, 0, 1, 1, 0, 0, 0])
feat_sensitve = np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0])
feat_sensitve = np.where(feat_sensitve > 0, 0, 1)
dpd = demographic_parity_difference(y_true, y_pred, sensitive_features=feat_sensitve)
dpr = demographic_parity_ratio(y_true, y_pred, sensitive_features=feat_sensitve)
"difference", dpd, "ratio", dpr

('difference', 0.19999999999999996, 'ratio', 0.6666666666666667)

## 个体公平

### 计算 EO Difference 的函数

下面两个指标更大的值

#### tpr 差距

$$
\text{true positive rate difference} = |P[h(X)=1 | A=1, Y=1] - P[h(X)=1 | A=0, Y=1]|
$$

#### fpr 差距

$$
\text{false positive rate difference} = |P[h(X)=1 | A=1, Y=0] - P[h(X)=1 | A=0, Y=0]|
$$

#### 期望值

EO Difference 为 0 表示所有群体拥有相同的 tpr, tnr, fpr, fnr

更多参考 `User Guide <disparity_metrics>`.

#### 参数

- y_true : 真实标签
- y_pred : 预测标签，`h(X)` 表示分类器的返回值 
- sensitive_features : 敏感属性
- method : (str) 如何计算差距，细节详见 `fairlearn.metrics.MetricFrame.difference`
- sample_weight : 样本权重

#### 返回值

浮点型 EO Difference

In [12]:
y_true = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
y_pred = np.array([0, 1, 1, 1, 0, 1, 1, 0, 0, 0])
feat_sensitve = np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0])
feat_sensitve = np.where(feat_sensitve > 0, 0, 1)
print(feat_sensitve)
eod = equalized_odds_difference(y_true, y_pred, sensitive_features=feat_sensitve)
eor = equalized_odds_ratio(y_true, y_pred, sensitive_features=feat_sensitve)
"difference", eod, "ratio", eor

[0 1 0 1 0 1 0 1 0 1]


('difference', 0.6666666666666667, 'ratio', 0.3333333333333333)

In [21]:
y_true = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
y_pred = np.array([0, 1, 1, 1, 0, 1, 1, 0, 0, 0])
feat_sensitve = np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0])

# df = pd.DataFrame({"y_true": y_true, "y_pred": y_pred, "feat_sensitve": feat_sensitve})
df = pd.DataFrame(
    [y_true, y_pred, feat_sensitve], index=["y_true", "y_pred", "feature_sensitve"]
)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
y_true,1,1,1,1,1,0,0,0,0,0
y_pred,0,1,1,1,0,1,1,0,0,0
feature_sensitve,1,0,1,0,1,0,1,0,1,0


[机器学习Fairness-公平性评价指标 - 知乎](https://zhuanlan.zhihu.com/p/600395360)