# Fairness and Bias Analysis - Bridge Failure Prediction

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Load Data and Predictions

In [2]:
df = pd.read_csv('../models/evaluation/predictions.csv')
features = pd.read_csv('../data/processed/features.csv')
df = df.merge(features[['structure_id', 'region_code', 'soil_type', 'avg_daily_traffic']], on='structure_id', how='left')

## Compare Failure Probability by Region

In [3]:
sns.boxplot(x='region_code', y='failure_probability', data=df)
plt.title('Failure Probability by Region')
plt.show()

## Disparity by Soil Type

In [4]:
sns.barplot(x='soil_type', y='failure_probability', data=df, ci=None)
plt.title('Mean Failure Probability by Soil Type')
plt.show()

## Traffic Volume and Failure Probability

In [5]:
sns.scatterplot(x='avg_daily_traffic', y='failure_probability', data=df, alpha=0.4)
plt.title('Failure Probability vs Average Daily Traffic')
plt.show()

## Statistical Test: Regional Disparity

In [6]:
from scipy.stats import f_oneway
region_groups = [g['failure_probability'].values for _, g in df.groupby('region_code')]
fstat, pval = f_oneway(*region_groups)
print(f'ANOVA F={fstat:.2f}, p={pval:.3g}')