In [None]:
{
 "nbformat": 4,
 "nbformat_minor": 5,
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fairness Audit â€” COMPAS Dataset (Synthetic Version)\n",
    "### Student: **Remmy Kipruto Tumo**\n",
    "### Toolkit: AIF360, Pandas, Matplotlib"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "execution_count": null,
   "outputs": [],
   "source": [
    "!pip install aif360 pandas numpy scikit-learn matplotlib seaborn"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from aif360.datasets import StandardDataset\n",
    "from aif360.metrics import ClassificationMetric\n",
    "from aif360.algorithms.preprocessing import Reweighing\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import accuracy_score"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "execution_count": null,
   "outputs": [],
   "source": [
    "df = pd.read_csv('compas.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "execution_count": null,
   "outputs": [],
   "source": [
    "dataset = StandardDataset(\n",
    "    df,\n",
    "    label_name='two_year_recid',\n",
    "    favorable_label=0,\n",
    "    protected_attribute_names=['race'],\n",
    "    privileged_classes=[['Caucasian']]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "execution_count": null,
   "outputs": [],
   "source": [
    "train, test = dataset.split([0.7], shuffle=True)\n",
    "X_train, y_train = train.features, train.labels.ravel()\n",
    "X_test, y_test = test.features, test.labels.ravel()\n",
    "clf = LogisticRegression(max_iter=1000)\n",
    "clf.fit(X_train, y_train)\n",
    "y_pred = clf.predict(X_test)\n",
    "test_pred = test.copy()\n",
    "test_pred.labels = y_pred.reshape(-1,1)"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "execution_count": null,
   "outputs": [],
   "source": [
    "metric = ClassificationMetric(\n",
    "    test, test_pred,\n",
    "    privileged_groups=[{'race': 'Caucasian'}],\n",
    "    unprivileged_groups=[{'race': 'African-American'}]\n",
    ")\n",
    "print('Accuracy:', accuracy_score(y_test, y_pred))\n",
    "print('Statistical Parity Difference:', metric.statistical_parity_difference())\n",
    "print('Disparate Impact:', metric.disparate_impact())\n",
    "print('Equal Opportunity Difference:', metric.equal_opportunity_difference())"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "execution_count": null,
   "outputs": [],
   "source": [
    "rw = Reweighing(\n",
    "    privileged_groups=[{'race': 'Caucasian'}],\n",
    "    unprivileged_groups=[{'race': 'African-American'}]\n",
    ")\n",
    "rw.fit(train)\n",
    "train_rw = rw.transform(train)\n",
    "clf2 = LogisticRegression(max_iter=1000)\n",
    "clf2.fit(train_rw.features, train_rw.labels.ravel(), sample_weight=train_rw.instance_weights)\n",
    "y_pred_rw = clf2.predict(X_test)\n",
    "test_pred_rw = test.copy()\n",
    "test_pred_rw.labels = y_pred_rw.reshape(-1,1)\n",
    "metric_rw = ClassificationMetric(\n",
    "    test, test_pred_rw,\n",
    "    privileged_groups=[{'race': 'Caucasian'}],\n",
    "    unprivileged_groups=[{'race': 'African-American'}]\n",
    ")\n",
    "print('Post-Mitigation Statistical Parity:', metric_rw.statistical_parity_difference())\n",
    "print('Post-Mitigation Disparate Impact:', metric_rw.disparate_impact())"
   ]
  }
 ]
}
