# Experiment: Label Noise and Fairness-Performance Tradeoff
This experiment evaluates how label noise affects the tradeoff between fairness and performance in a classification task. We implement the 2LR Plugin-Approach (from Bob's paper) and analyze synthetic data under various levels of label noise.

## 1. Import Libraries

In [None]:
from random import seed, shuffle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
import pandas as pd
import sys
sys.path.insert(1, '/path/to/tools')  # Update this path as needed
from gen_synth_data import *
from plot_helper import *
from corrupt_labels import *
from calc_metrics import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

## 2. Configuration

In [None]:
# Global Experiment Configuration
flip_prob = 0.5
disc_factor = 0.5
n_samples = 2000
random_seed = 0

## 3. Data Generation

In [None]:
# Set seed for reproducibility
np.random.seed(random_seed)

# Generate synthetic data
X, Y, Y_sen = generate_synthetic_data(False, n_samples=n_samples, disc_factor=disc_factor)
Y_corrupted = add_bin_noise(Y, flip_prob)
Y_sen_corrupted = add_bin_noise(Y_sen, flip_prob)

## 4. Model Training

In [None]:
# Split data
split_index = int(0.7 * len(X))
X_train, X_test = X[:split_index], X[split_index:]
Y_train, Y_test = Y[:split_index], Y[split_index:]
Y_corr_train, Y_corr_test = Y_corrupted[:split_index], Y_corrupted[split_index:]
Y_sen_train, Y_sen_test = Y_sen[:split_index], Y_sen[split_index:]
Y_sen_corr_train, Y_sen_corr_test = Y_sen_corrupted[:split_index], Y_sen_corrupted[split_index:]

# Initialize and train models
p_reg = LogisticRegression().fit(X_train, Y_train)
p_reg_cor = LogisticRegression().fit(X_train, Y_corr_train)
f_reg = LogisticRegression().fit(X_train, Y_sen_train)
f_reg_cor = LogisticRegression().fit(X_train, Y_sen_corr_train)

## 5. Visualization of Decision Boundaries

In [None]:
# Example visualization function (uses plot_helper)
# subplot_reg_corr(...)

## 6. Metric Calculation: BER and Fairness

In [None]:
# Sweep over lambda values and calculate metrics
lmd_values = np.linspace(0, 2, 100)
# Compute metrics here...

In [None]:
# Store results in DataFrame
results = pd.DataFrame({
    'lambda': lmd_values,
    'BER_clean': [],
    'MD_clean': [],
    'DI_clean': [],
    'BER_corr': [],
    'MD_corr': [],
    'DI_corr': []
})

## 7. Reproducibility Notes
- Python version: (fill in)
- Libraries: numpy, matplotlib, sklearn, pandas
- Custom modules: `gen_synth_data`, `calc_metrics`, etc.
- Random seed: 0is
