# 07: Running Experiments

This notebook demonstrates how to run the three main experiments from the paper:
1. Ablation Study
2. Multi-Dataset Benchmarking
3. Regulatory Compliance

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import subprocess
import sys
import os

# Change to project root directory
os.chdir(os.path.dirname(os.getcwd()) if os.path.basename(os.getcwd()) == 'notebooks' else os.getcwd())
print(f"Working directory: {os.getcwd()}")
print(f"Python executable: {sys.executable}")

sns.set_theme(style="whitegrid")
%matplotlib inline

In [None]:
def run_experiment(script_path, args):
    """Run an experiment script using the current Python interpreter."""
    cmd = [sys.executable, script_path] + args
    print(f"Running: {' '.join(cmd)}")
    print("-" * 60)
    result = subprocess.run(cmd, capture_output=True, text=True)
    print(result.stdout)
    if result.stderr:
        print("STDERR:", result.stderr)
    if result.returncode != 0:
        print(f"WARNING: Command exited with code {result.returncode}")
    return result.returncode == 0

## Experiment 1: Ablation Study

Test the impact of removing key FAIR-CARE components.

In [None]:
# Run experiment (this may take ~30 minutes)
run_experiment(
    'experiments/scripts/runexperiment1.py',
    [
        '--datasets', 'compas',
        '--configs', 'baseline,configa,configb,configc,default',
        '--output', 'results/exp1.csv'
    ]
)

In [None]:
# Load and visualize results
exp1 = pd.read_csv('results/exp1.csv')
exp1

In [None]:
# Plot FAIR-CARE scores by configuration
plt.figure(figsize=(10, 6))
exp1.groupby('config')['faircarescore'].mean().sort_values().plot(kind='barh', color='steelblue')
plt.xlabel('FAIR-CARE Score')
plt.ylabel('Configuration')
plt.title('Ablation Study: FAIR-CARE Score by Configuration')
plt.axvline(x=0.85, color='green', linestyle='--', label='EXCELLENT')
plt.axvline(x=0.70, color='orange', linestyle='--', label='ACCEPTABLE')
plt.legend()
plt.tight_layout()
plt.show()

## Experiment 2: Multi-Dataset Benchmarking

Compare FAIR-CARE performance across datasets.

In [None]:
# Run experiment (this may take ~45 minutes)
run_experiment(
    'experiments/scripts/runexperiment2.py',
    [
        '--datasets', 'compas,adult,german',
        '--config', 'experiments/configs/default.yaml',
        '--output', 'results/exp2.csv'
    ]
)

In [None]:
# Load and visualize results
exp2 = pd.read_csv('results/exp2.csv')
exp2

In [None]:
# Plot layer scores by dataset
grouped = exp2.groupby('dataset')[['SB', 'SS', 'SG', 'faircarescore']].mean()

grouped.plot(kind='bar', figsize=(12, 6))
plt.ylabel('Score')
plt.xlabel('Dataset')
plt.title('Multi-Dataset Benchmarking: Layer Scores')
plt.legend(['Bronze (SB)', 'Silver (SS)', 'Gold (SG)', 'FAIR-CARE'])
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Experiment 3: Regulatory Compliance

Test GDPR, HIPAA, and CCPA compliance modes.

In [None]:
# Run experiment (this may take ~40 minutes)
run_experiment(
    'experiments/scripts/runexperiment3.py',
    [
        '--datasets', 'compas,adult,german',
        '--regulations', 'gdpr,hipaa,ccpa',
        '--output', 'results/exp3.csv'
    ]
)

In [None]:
# Load and visualize results
exp3 = pd.read_csv('results/exp3.csv')
exp3

In [None]:
# Plot compliance by regulation
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# FAIR-CARE scores
exp3.groupby('regulation')['faircarescore'].mean().plot(kind='bar', ax=ax1, color='steelblue')
ax1.set_ylabel('FAIR-CARE Score')
ax1.set_xlabel('Regulation')
ax1.set_title('FAIR-CARE Score by Regulation')
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=0)

# Privacy risk
exp3.groupby('regulation')['privacy_risk'].mean().plot(kind='bar', ax=ax2, color='coral')
ax2.set_ylabel('Privacy Risk')
ax2.set_xlabel('Regulation')
ax2.set_title('Privacy Risk by Regulation')
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=0)

plt.tight_layout()
plt.show()

## Generate All Figures

Create publication-ready figures for the paper.

In [None]:
run_experiment(
    'experiments/scripts/aggregateresults.py',
    [
        '--inputs', 'results/exp1.csv,results/exp2.csv,results/exp3.csv',
        '--output', 'results/figures/',
        '--format', 'png,pdf'
    ]
)

## Summary

All experiments complete:
- Ablation study
- Multi-dataset benchmarking
- Regulatory compliance
- Figures generated

Results are ready for paper submission!