In [1]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# COMPAS Fairness Visualization Notebook\n",
    "## AI Ethics Assignment - Comprehensive Fairness Analysis with Results\n",
    "\n",
    "This notebook loads and visualizes the audit results JSON file with comprehensive fairness analysis."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from pathlib import Path\n",
    "\n",
    "sns.set_style('whitegrid')\n",
    "plt.rcParams['figure.figsize'] = (14, 8)\n",
    "\n",
    "print('✓ Libraries imported successfully')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Audit Results JSON"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load JSON results from audit\n",
    "json_path = Path('results/audit_results.json')\n",
    "\n",
    "with open(json_path, 'r') as f:\n",
    "    results = json.load(f)\n",
    "\n",
    "print(f'✓ Results loaded from: {json_path}')\n",
    "print(f'\\nJSON Structure:')\n",
    "print(f'  - Summary section')\n",
    "print(f'  - Metrics for each race')\n",
    "print(f'  - Disparate impact analysis')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extract and Display Summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract summary data\n",
    "summary = results['summary']\n",
    "total_records = summary['total_records']\n",
    "aa_count = summary['african_american']\n",
    "ca_count = summary['caucasian']\n",
    "\n",
    "print('='*70)\n",
    "print('AUDIT SUMMARY')\n",
    "print('='*70)\n",
    "print(f'\\nTotal Records Analyzed: {total_records:,}')\n",
    "print(f'\\nDemographic Breakdown:')\n",
    "print(f'  African-American: {aa_count:,} ({aa_count/total_records*100:.1f}%)')\n",
    "print(f'  Caucasian: {ca_count:,} ({ca_count/total_records*100:.1f}%)')\n",
    "print(f'\\nTotal: {aa_count + ca_count:,} records')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extract Metrics from JSON"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract metrics\n",
    "metrics = results['metrics']\n",
    "\n",
    "# African-American metrics\n",
    "aa_metrics = metrics['African-American']\n",
    "ca_metrics = metrics['Caucasian']\n",
    "\n",
    "print('\\n' + '='*70)\n",
    "print('FAIRNESS METRICS BY RACE')\n",
    "print('='*70)\n",
    "\n",
    "for race, metric_data in metrics.items():\n",
    "    print(f'\\n{race}:')\n",
    "    print(f'  Sample Size: {metric_data[\"n\"]:,}')\n",
    "    print(f'  True Positives: {metric_data[\"tp\"]:,}')\n",
    "    print(f'  False Positives: {metric_data[\"fp\"]:,}')\n",
    "    print(f'  True Negatives: {metric_data[\"tn\"]:,}')\n",
    "    print(f'  False Negatives: {metric_data[\"fn\"]:,}')\n",
    "    print(f'\\n  RATES:')\n",
    "    print(f'    FPR (False Positive Rate): {metric_data[\"fpr\"]:.4f} ({metric_data[\"fpr\"]*100:.2f}%)')\n",
    "    print(f'    FNR (False Negative Rate): {metric_data[\"fnr\"]:.4f} ({metric_data[\"fnr\"]*100:.2f}%)')\n",
    "    print(f'    TPR (True Positive Rate): {metric_data[\"tpr\"]:.4f} ({metric_data[\"tpr\"]*100:.2f}%)')\n",
    "    print(f'    Accuracy: {metric_data[\"accuracy\"]:.4f} ({metric_data[\"accuracy\"]*100:.2f}%)')\n",
    "    print(f'    Selection Rate: {metric_data[\"selection_rate\"]:.4f} ({metric_data[\"selection_rate\"]*100:.2f}%)')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Disparate Impact Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract disparate impact\n",
    "di = results['disparate_impact']\n",
    "fpr_ratio = di['fpr_ratio']\n",
    "\n",
    "print('\\n' + '='*70)\n",
    "print('DISPARATE IMPACT ANALYSIS')\n",
    "print('='*70)\n",
    "\n",
    "print(f'\\nFalse Positive Rate (FPR) by Race:')\n",
    "print(f'  African-American: {aa_metrics[\"fpr\"]:.4f} ({aa_metrics[\"fpr\"]*100:.2f}%)')\n",
    "print(f'  Caucasian: {ca_metrics[\"fpr\"]:.4f} ({ca_metrics[\"fpr\"]*100:.2f}%)')\n",
    "\n",
    "print(f'\\nDisparate Impact Ratio:')\n",
    "print(f'  {fpr_ratio:.4f}')\n",
    "\n",
    "print(f'\\nInterpretation:')\n",
    "print(f'  {di[\"interpretation\"]}')\n",
    "print(f'  African-American defendants are flagged as high-risk at {fpr_ratio:.2f}x')\n",
    "print(f'  the rate of Caucasian defendants.')\n",
    "\n",
    "print(f'\\nEEOC 80% Rule Assessment:')\n",
    "print(f'  Standard: Ratio should be >= 0.8')\n",
    "print(f'  Current Ratio: {fpr_ratio:.4f}')\n",
    "print(f'  Status: {\"✓ PASSES\" if fpr_ratio >= 0.8 else \"✗ FAILS\"}')\n",
    "\n",
    "print('\\n' + '='*70)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Visualization 1: FPR Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create comparison visualizations\n",
    "fig, axes = plt.subplots(2, 3, figsize=(18, 10))\n",
    "fig.suptitle('COMPAS Fairness Metrics Analysis by Race', fontsize=18, fontweight='bold')\n",
    "\n",
    "races = ['African-American', 'Caucasian']\n",
    "colors = ['#FF6B6B', '#4ECDC4']\n",
    "\n",
    "# Plot 1: False Positive Rate\n",
    "fprs = [aa_metrics['fpr'], ca_metrics['fpr']]\n",
    "axes[0, 0].bar(races, fprs, color=colors, alpha=0.7, edgecolor='black', linewidth=2)\n",
    "axes[0, 0].set_ylabel('FPR', fontsize=12, fontweight='bold')\n",
    "axes[0, 0].set_title('False Positive Rate (FPR)', fontsize=13, fontweight='bold')\n",
    "axes[0, 0].set_ylim([0, max(fprs) * 1.3])\n",
    "for i, v in enumerate(fprs):\n",
    "    axes[0, 0].text(i, v + 0.02, f'{v:.1%}', ha='center', fontweight='bold', fontsize=11)\n",
    "axes[0, 0].axhline(y=0.3, color='orange', linestyle='--', alpha=0.5, label='High concern')\n",
    "\n",
    "# Plot 2: False Negative Rate\n",
    "fnrs = [aa_metrics['fnr'], ca_metrics['fnr']]\n",
    "axes[0, 1].bar(races, fnrs, color=colors, alpha=0.7, edgecolor='black', linewidth=2)\n",
    "axes[0, 1].set_ylabel('FNR', fontsize=12, fontweight='bold')\n",
    "axes[0, 1].set_title('False Negative Rate (FNR)', fontsize=13, fontweight='bold')\n",
    "axes[0, 1].set_ylim([0, max(fnrs) * 1.3])\n",
    "for i, v in enumerate(fnrs):\n",
    "    axes[0, 1].text(i, v + 0.02, f'{v:.1%}', ha='center', fontweight='bold', fontsize=11)\n",
    "\n",
    "# Plot 3: True Positive Rate\n",
    "tprs = [aa_metrics['tpr'], ca_metrics['tpr']]\n",
    "axes[0, 2].bar(races, tprs, color=colors, alpha=0.7, edgecolor='black', linewidth=2)\n",
    "axes[0, 2].set_ylabel('TPR', fontsize=12, fontweight='bold')\n",
    "axes[0, 2].set_title('True Positive Rate (TPR)', fontsize=13, fontweight='bold')\n",
    "axes[0, 2].set_ylim([0, 1])\n",
    "for i, v in enumerate(tprs):\n",
    "    axes[0, 2].text(i, v + 0.02, f'{v:.1%}', ha='center', fontweight='bold', fontsize=11)\n",
    "\n",
    "# Plot 4: Accuracy\n",
    "accs = [aa_metrics['accuracy'], ca_metrics['accuracy']]\n",
    "axes[1, 0].bar(races, accs, color=colors, alpha=0.7, edgecolor='black', linewidth=2)\n",
    "axes[1, 0].set_ylabel('Accuracy', fontsize=12, fontweight='bold')\n",
    "axes[1, 0].set_title('Overall Accuracy', fontsize=13, fontweight='bold')\n",
    "axes[1, 0].set_ylim([0, 1])\n",
    "for i, v in enumerate(accs):\n",
    "    axes[1, 0].text(i, v + 0.02, f'{v:.1%}', ha='center', fontweight='bold', fontsize=11)\n",
    "\n",
    "# Plot 5: Selection Rate\n",
    "sel_rates = [aa_metrics['selection_rate'], ca_metrics['selection_rate']]\n",
    "axes[1, 1].bar(races, sel_rates, color=colors, alpha=0.7, edgecolor='black', linewidth=2)\n",
    "axes[1, 1].set_ylabel('Selection Rate', fontsize=12, fontweight='bold')\n",
    "axes[1, 1].set_title('Proportion Predicted High-Risk', fontsize=13, fontweight='bold')\n",
    "axes[1, 1].set_ylim([0, 1])\n",
    "for i, v in enumerate(sel_rates):\n",
    "    axes[1, 1].text(i, v + 0.02, f'{v:.1%}', ha='center', fontweight='bold', fontsize=11)\n",
    "\n",
    "# Plot 6: Disparate Impact Ratio\n",
    "axes[1, 2].barh(['DI Ratio'], [fpr_ratio], color='#95E1D3', alpha=0.7, edgecolor='black', linewidth=2, height=0.4)\n",
    "axes[1, 2].axvline(x=0.8, color='red', linestyle='--', linewidth=2.5, label='EEOC threshold')\n",
    "axes[1, 2].set_xlabel('Ratio', fontsize=12, fontweight='bold')\n",
    "axes[1, 2].set_title('Disparate Impact Ratio', fontsize=13, fontweight='bold')\n",
    "axes[1, 2].text(fpr_ratio + 0.05, 0, f'{fpr_ratio:.2f}x', va='center', fontweight='bold', fontsize=12)\n",
    "axes[1, 2].set_xlim([0, 2.5])\n",
    "axes[1, 2].legend()\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "print('✓ Visualization complete')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Visualization 2: Confusion Matrices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n",
    "fig.suptitle('Confusion Matrices by Race', fontsize=16, fontweight='bold')\n",
    "\n",
    "for idx, race in enumerate(['African-American', 'Caucasian']):\n",
    "    m = metrics[race]\n",
    "    cm = np.array([[m['tn'], m['fp']], [m['fn'], m['tp']]])\n",
    "    \n",
    "    cmap = 'Reds' if idx == 0 else 'Blues'\n",
    "    sns.heatmap(cm, annot=True, fmt='d', cmap=cmap, ax=axes[idx], \n",
    "                cbar_kws={'label': 'Count'}, linewidths=2, linecolor='black',\n",
    "                xticklabels=['Predicted\\nNegative', 'Predicted\\nPositive'],\n",
    "                yticklabels=['Actual\\nNegative', 'Actual\\nPositive'])\n",
    "    axes[idx].set_title(f'{race}', fontweight='bold', fontsize=13)\n",
    "    axes[idx].set_ylabel('True Label', fontweight='bold')\n",
    "    axes[idx].set_xlabel('Predicted Label', fontweight='bold')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "print('✓ Confusion matrices displayed')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Visualization 3: Detailed Metrics Comparison Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create detailed comparison table\n",
    "comparison_data = {\n",
    "    'Metric': ['Sample Size', 'True Positives', 'False Positives', 'True Negatives', 'False Negatives',\n",
    "               'FPR (%)', 'FNR (%)', 'TPR (%)', 'Accuracy (%)', 'Selection Rate (%)'],\n",
    "    'African-American': [\n",
    "        f\"{aa_metrics['n']:,}\",\n",
    "        f\"{aa_metrics['tp']:,}\",\n",
    "        f\"{aa_metrics['fp']:,}\",\n",
    "        f\"{aa_metrics['tn']:,}\",\n",
    "        f\"{aa_metrics['fn']:,}\",\n",
    "        f\"{aa_metrics['fpr']*100:.2f}%\",\n",
    "        f\"{aa_metrics['fnr']*100:.2f}%\",\n",
    "        f\"{aa_metrics['tpr']*100:.2f}%\",\n",
    "        f\"{aa_metrics['accuracy']*100:.2f}%\",\n",
    "        f\"{aa_metrics['selection_rate']*100:.2f}%\"\n",
    "    ],\n",
    "    'Caucasian': [\n",
    "        f\"{ca_metrics['n']:,}\",\n",
    "        f\"{ca_metrics['tp']:,}\",\n",
    "        f\"{ca_metrics['fp']:,}\",\n",
    "        f\"{ca_metrics['tn']:,}\",\n",
    "        f\"{ca_metrics['fn']:,}\",\n",
    "        f\"{ca_metrics['fpr']*100:.2f}%\",\n",
    "        f\"{ca_metrics['fnr']*100:.2f}%\",\n",
    "        f\"{ca_metrics['tpr']*100:.2f}%\",\n",
    "        f\"{ca_metrics['accuracy']*100:.2f}%\",\n",
    "        f\"{ca_metrics['selection_rate']*100:.2f}%\"\n",
    "    ]\n",
    "}\n",
    "\n",
    "df_comparison = pd.DataFrame(comparison_data)\n",
    "print('\\n' + '='*80)\n",
    "print('COMPREHENSIVE METRICS COMPARISON')\n",
    "print('='*80 + '\\n')\n",
    "print(df_comparison.to_string(index=False))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Key Findings & Interpretation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('\\n' + '='*80)\n",
    "print('KEY FINDINGS - RACIAL BIAS IN COMPAS')\n",
    "print('='*80)\n",
    "\n",
    "print(f'\\n1. PRIMARY FINDING - Disparate Impact:')\n",
    "print(f'   African-American defendants are flagged as high-risk at {fpr_ratio:.2f}x')\n",
    "print(f'   the rate of Caucasian defendants.')\n",
    "\n",
    "fpr_gap = aa_metrics['fpr'] - ca_metrics['fpr']\n",
    "print(f'\\n2. FALSE POSITIVE RATE GAP (Most Critical):')\n",
    "print(f'   African-American FPR: {aa_metrics[\"fpr\"]*100:.2f}%')\n",
    "print(f'   Caucasian FPR: {ca_metrics[\"fpr\"]*100:.2f}%')\n",
    "print(f'   Gap: {fpr_gap*100:.2f} percentage points')\n",
    "print(f'\\n   Interpretation: Among {summary[\"african_american\"]:,} African-American defendants,')\n",
    "print(f'   approximately {int(summary[\"african_american\"] * (1 - aa_metrics[\"tpr\"] - aa_metrics[\"fnr\"]))} who did NOT reoffend')\n",
    "print(f'   were incorrectly flagged as high-risk.')\n",
    "\n",
    "print(f'\\n3. FALSE NEGATIVE RATE (Opposite Pattern):')\n",
    "print(f'   African-American FNR: {aa_metrics[\"fnr\"]*100:.2f}%')\n",
    "print(f'   Caucasian FNR: {ca_metrics[\"fnr\"]*100:.2f}%')\n",
    "print(f'\\n   Interpretation: Caucasian defendants are more likely to be')\n",
    "print(f'   classified as low-risk despite actual reoffending.')\n",
    "\n",
    "print(f'\\n4. SELECTION RATE DISPARITY:')\n",
    "print(f'   African-American predicted high-risk: {aa_metrics[\"selection_rate\"]*100:.2f}%')\n",
    "print(f'   Caucasian predicted high-risk: {ca_metrics[\"selection_rate\"]*100:.2f}%')\n",
    "print(f'   Difference: {(aa_metrics[\"selection_rate\"] - ca_metrics[\"selection_rate\"])*100:.2f} percentage points')\n",
    "\n",
    "print(f'\\n5. LEGAL STANDARD - EEOC 80% Rule:')\n",
    "print(f'   Standard: Disparate Impact Ratio should be >= 0.8')\n",
    "print(f'   Current Ratio: {fpr_ratio:.4f}')\n",
    "print(f'   Status: {\"✓ PASSES\" if fpr_ratio >= 0.8 else \"✗ FAILS (Evidence of Discrimination)\"}')\n",
    "\n",
    "print(f'\\n6. SYSTEMIC IMPACT:')\n",
    "print(f'   - African-American defendants receive disproportionately harsh treatment')\n",
    "print(f'   - More likely to receive longer sentences/higher bail')\n",
    "print(f'   - Creates compounding disadvantage through criminal justice system')\n",
    "print(f'   - Perpetuates systemic racial inequality')\n",
    "\n",
    "print(f'\\n' + '='*80)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Ethical Assessment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('\\nETHICAL PRINCIPLES ASSESSMENT')\n",
    "print('='*80)\n",
    "\n",
    "print('\\nThis COMPAS system VIOLATES the following ethical principles:')\n",
    "\n",
    "print('\\n1. JUSTICE - Fair Distribution:')\n",
    "print('   ✗ VIOLATED - Unequal outcomes for similar behavior')\n",
    "print('   ✗ Disparate impact ratio: 1.91x (should be >= 0.8)')\n",
    "\n",
    "print('\\n2. NON-MALEFICENCE - Do No Harm:')\n",
    "print('   ✗ VIOLATED - Causes demonstrable harm to African-American defendants')\n",
    "print('   ✗ 805 African-Americans incorrectly flagged as high-risk')\n",
    "\n",
    "print('\\n3. AUTONOMY - Respect Individual Control:')\n",
    "print('   ✗ VIOLATED - Biased predictions limit opportunities and freedom')\n",
    "print('   ✗ Affects sentencing, bail decisions, parole eligibility')\n",
    "\n",
    "print('\\n4. TRANSPARENCY - Open Decision-Making:')\n",
    "print('   ✓ PARTIALLY ADDRESSED - Algorithm is documented')\n",
    "print('   ✗ But bias not disclosed to stakeholders')\n",
    "\n",
    "print('\\n' + '='*80)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Remediation Recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('\\nRECOMMENDED REMEDIATION STEPS')\n",
    "print('='*80)\n",
    "\n",
    "print('\\nIMMEDIATE ACTIONS (0-3 months):')\n",
    "print('  1. STOP using COMPAS as sole decision-making tool')\n",
    "print('  2. IMPLEMENT mandatory human review for ALL high-risk classifications')\n",
    "print('  3. AUDIT all prior sentences influenced by biased COMPAS scores')\n",
    "print('  4. DISCLOSE bias findings to all stakeholders')\n",
    "\n",
    "print('\\nSHORT-TERM FIXES (3-6 months):')\n",
    "print('  1. RETRAIN model with explicit fairness constraints')\n",
    "print('  2. REMOVE proxy variables correlated with race')\n",
    "print('  3. TARGET: Achieve disparate impact ratio >= 0.85')\n",
    "print('  4. CONDUCT independent fairness audit')\n",
    "\n",
    "print('\\nLONG-TERM SOLUTIONS (6+ months):')\n",
    "print('  1. DEVELOP alternative risk assessment methods')\n",
    "print('  2. INVOLVE affected communities in system redesign')\n",
    "print('  3. ESTABLISH independent algorithmic audit board')\n",
    "print('  4. IMPLEMENT quarterly fairness monitoring')\n",
    "\n",
    "print('\\n' + '='*80)\n",
    "print('CONCLUSION')\n",
    "print('='*80)\n",
    "\n",
    "print('\\nCOMPAS demonstrates how AI systems amplify systemic biases at scale.')\n",
    "print('Technical fixes alone are insufficient. Remediation requires:')\n",
    "print('  • Legal reform to prohibit discriminatory algorithms')\n",
    "print('  • Community involvement in system governance')\n",
    "print('  • Genuine commitment to equitable criminal justice outcomes')\n",
    "print('  • Transparency and accountability to those affected')\n",
    "\n",
    "print('\\n' + '='*80)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}

NameError: name 'null' is not defined