# RL Bias Mitigation Project - Phase 1
## Loan Approval System with Fairness Constraints

**Group 3**: Aniketh (AM.EN.UA41E22009), Jatin (AM.EN.UA41E22024), Kaushik (AM.EN.UA41E22026)

---

### Problem Statement

Loan approval systems often embed historical human biases. This project uses **Reinforcement Learning (DQN)** to learn fair loan approval decisions while reducing gender-based disparity.

**Baseline Data Statistics:**
- Men approval rate: **42.93%**
- Women approval rate: **18.92%**
- Goal: Reduce this disparity through fairness-aware RL

---

### MDP Formulation

- **State Space (S)**: `(salary, years_exp, sex)`
- **Action Space (A)**: `{0: Reject, 1: Approve}`
- **Reward Function (R)**: `R_classification + R_fairness`
  - Classification: +1 (correct approve), -1 (wrong approve), 0 (reject)
  - Fairness penalty: `-Œª * |approval_rate(women) - approval_rate(men)|`
- **Discount Factor (Œ≥)**: 0.99
- **Episode Length**: 100 applicants

## 1. Import Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple
from collections import deque, namedtuple
import random
import warnings
warnings.filterwarnings('ignore')

# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if device.type == 'cuda':
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")

# Matplotlib settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

## 2. Load and Analyze Dataset

In [None]:
# Load dataset
data = pd.read_csv('biased_gender_loans.csv')

print("Dataset Shape:", data.shape)
print("\nFirst 5 rows:")
print(data.head())
print("\nDataset Info:")
print(data.info())
print("\nBasic Statistics:")
print(data.describe())

In [None]:
# Analyze baseline bias
men_data = data[data['sex'] == 'Man']
women_data = data[data['sex'] == 'Woman']

approval_rate_men = (men_data['bank_loan'] == 'Yes').sum() / len(men_data)
approval_rate_women = (women_data['bank_loan'] == 'Yes').sum() / len(women_data)
spd_baseline = approval_rate_women - approval_rate_men

print("=" * 60)
print("BASELINE DATA ANALYSIS (Biased Historical Data)")
print("=" * 60)
print(f"Total Applicants: {len(data)}")
print(f"  Men: {len(men_data)}")
print(f"  Women: {len(women_data)}")
print(f"\nApproval Rates:")
print(f"  Men: {approval_rate_men:.2%}")
print(f"  Women: {approval_rate_women:.2%}")
print(f"\nFairness Metrics:")
print(f"  Statistical Parity Difference: {spd_baseline:.4f}")
print(f"  Absolute Bias Gap: {abs(spd_baseline):.2%}")
print("=" * 60)

## 3. Visualize Data Distribution

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Dataset Distribution Analysis', fontsize=16, fontweight='bold')

# Plot 1: Salary distribution by gender
axes[0, 0].hist(men_data['salary'], bins=30, alpha=0.6, label='Men', color='blue')
axes[0, 0].hist(women_data['salary'], bins=30, alpha=0.6, label='Women', color='orange')
axes[0, 0].set_xlabel('Salary')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Salary Distribution by Gender')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Experience distribution by gender
axes[0, 1].hist(men_data['years_exp'], bins=20, alpha=0.6, label='Men', color='blue')
axes[0, 1].hist(women_data['years_exp'], bins=20, alpha=0.6, label='Women', color='orange')
axes[0, 1].set_xlabel('Years of Experience')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Experience Distribution by Gender')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Approval rate by gender
approval_counts = data.groupby(['sex', 'bank_loan']).size().unstack(fill_value=0)
approval_counts.plot(kind='bar', ax=axes[1, 0], color=['indianred', 'steelblue'])
axes[1, 0].set_xlabel('Gender')
axes[1, 0].set_ylabel('Count')
axes[1, 0].set_title('Loan Approval Counts by Gender')
axes[1, 0].set_xticklabels(axes[1, 0].get_xticklabels(), rotation=0)
axes[1, 0].legend(title='Loan Approved')
axes[1, 0].grid(True, alpha=0.3, axis='y')

# Plot 4: Salary vs Experience colored by approval
approved = data[data['bank_loan'] == 'Yes']
rejected = data[data['bank_loan'] == 'No']
axes[1, 1].scatter(rejected['years_exp'], rejected['salary'], alpha=0.4, s=20, c='red', label='Rejected')
axes[1, 1].scatter(approved['years_exp'], approved['salary'], alpha=0.4, s=20, c='green', label='Approved')
axes[1, 1].set_xlabel('Years of Experience')
axes[1, 1].set_ylabel('Salary')
axes[1, 1].set_title('Salary vs Experience (Colored by Approval)')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Define Loan Approval Environment (MDP)

In [None]:
class LoanApprovalEnv:
    """Custom Environment for Loan Approval with Fairness Constraints"""
    
    def __init__(self, data: pd.DataFrame, episode_length: int = 100, lambda_fairness: float = 0.5):
        self.data_original = data.copy()
        self.episode_length = episode_length
        self.lambda_fairness = lambda_fairness
        
        # Normalize continuous features
        self.salary_min = data['salary'].min()
        self.salary_max = data['salary'].max()
        self.exp_min = data['years_exp'].min()
        self.exp_max = data['years_exp'].max()
        
        self.data = data.copy()
        self.data['salary_norm'] = (data['salary'] - self.salary_min) / (self.salary_max - self.salary_min)
        self.data['exp_norm'] = (data['years_exp'] - self.exp_min) / (self.exp_max - self.exp_min)
        self.data['sex_encoded'] = (data['sex'] == 'Man').astype(float)
        self.data['label'] = (data['bank_loan'] == 'Yes').astype(int)
        
        # Episode tracking
        self.current_step = 0
        self.current_state = None
        self.current_label = None
        self.current_sex = None
        
        # Fairness tracking
        self.episode_approvals = {'Man': [], 'Woman': []}
        self.episode_total = {'Man': 0, 'Woman': 0}
        self.total_approvals = {'Man': 0, 'Woman': 0}
        self.total_counts = {'Man': 0, 'Woman': 0}
    
    def reset(self):
        """Reset environment for new episode"""
        self.current_step = 0
        self.episode_approvals = {'Man': [], 'Woman': []}
        self.episode_total = {'Man': 0, 'Woman': 0}
        return self._get_next_applicant()
    
    def _get_next_applicant(self):
        """Sample random applicant from dataset"""
        idx = np.random.randint(0, len(self.data))
        row = self.data.iloc[idx]
        
        self.current_state = np.array([
            row['salary_norm'],
            row['exp_norm'],
            row['sex_encoded']
        ], dtype=np.float32)
        
        self.current_label = row['label']
        self.current_sex = row['sex']
        self.episode_total[self.current_sex] += 1
        self.total_counts[self.current_sex] += 1
        
        return self.current_state
    
    def _compute_classification_reward(self, action: int):
        """Compute base classification reward"""
        if action == 1:  # Approve
            return 1.0 if self.current_label == 1 else -1.0
        else:  # Reject
            return 0.0
    
    def _compute_fairness_penalty(self):
        """Compute fairness penalty based on approval rate disparity"""
        approval_rate_man = (
            np.mean(self.episode_approvals['Man']) 
            if len(self.episode_approvals['Man']) > 0 else 0.0
        )
        approval_rate_woman = (
            np.mean(self.episode_approvals['Woman']) 
            if len(self.episode_approvals['Woman']) > 0 else 0.0
        )
        
        gap = abs(approval_rate_woman - approval_rate_man)
        penalty = -self.lambda_fairness * gap
        return float(penalty)
    
    def step(self, action: int):
        """Execute one step in the environment"""
        # Record approval decision
        self.episode_approvals[self.current_sex].append(float(action))
        if action == 1:
            self.total_approvals[self.current_sex] += 1
        
        # Compute rewards
        classification_reward = self._compute_classification_reward(action)
        fairness_penalty = self._compute_fairness_penalty()
        total_reward = classification_reward + fairness_penalty
        
        # Increment step
        self.current_step += 1
        done = self.current_step >= self.episode_length
        
        # Get next applicant
        if not done:
            next_state = self._get_next_applicant()
        else:
            next_state = self.current_state
        
        info = {
            'classification_reward': classification_reward,
            'fairness_penalty': fairness_penalty,
            'current_sex': self.current_sex,
            'action': action,
            'label': self.current_label
        }
        
        return next_state, total_reward, done, info
    
    def get_fairness_metrics(self):
        """Calculate fairness metrics"""
        approval_rate_man = (
            self.total_approvals['Man'] / self.total_counts['Man']
            if self.total_counts['Man'] > 0 else 0.0
        )
        approval_rate_woman = (
            self.total_approvals['Woman'] / self.total_counts['Woman']
            if self.total_counts['Woman'] > 0 else 0.0
        )
        
        spd = approval_rate_woman - approval_rate_man
        
        return {
            'approval_rate_men': approval_rate_man,
            'approval_rate_women': approval_rate_woman,
            'statistical_parity_difference': spd,
            'disparity_ratio': approval_rate_woman / approval_rate_man if approval_rate_man > 0 else 0.0,
            'total_approvals_men': self.total_approvals['Man'],
            'total_approvals_women': self.total_approvals['Woman'],
            'total_men': self.total_counts['Man'],
            'total_women': self.total_counts['Woman']
        }

print("‚úì LoanApprovalEnv class defined")

## 5. Define DQN Components

In [None]:
# Experience tuple for replay buffer
Experience = namedtuple('Experience', ['state', 'action', 'reward', 'next_state', 'done'])

class QNetwork(nn.Module):
    """Q-Network for approximating Q-values"""
    
    def __init__(self, state_dim=3, action_dim=2, hidden_dims=[64, 64]):
        super(QNetwork, self).__init__()
        
        self.network = nn.Sequential(
            nn.Linear(state_dim, hidden_dims[0]),
            nn.ReLU(),
            nn.Linear(hidden_dims[0], hidden_dims[1]),
            nn.ReLU(),
            nn.Linear(hidden_dims[1], action_dim)
        )
    
    def forward(self, state):
        return self.network(state)


class ReplayBuffer:
    """Experience Replay Buffer"""
    
    def __init__(self, capacity=10000):
        self.buffer = deque(maxlen=capacity)
    
    def push(self, state, action, reward, next_state, done):
        self.buffer.append(Experience(state, action, reward, next_state, done))
    
    def sample(self, batch_size):
        experiences = random.sample(self.buffer, batch_size)
        
        states = torch.FloatTensor(np.array([e.state for e in experiences]))
        actions = torch.LongTensor(np.array([e.action for e in experiences]))
        rewards = torch.FloatTensor(np.array([e.reward for e in experiences]))
        next_states = torch.FloatTensor(np.array([e.next_state for e in experiences]))
        dones = torch.FloatTensor(np.array([e.done for e in experiences]))
        
        return states, actions, rewards, next_states, dones
    
    def __len__(self):
        return len(self.buffer)


class DQNAgent:
    """DQN Agent with Œµ-greedy exploration and target network"""
    
    def __init__(
        self,
        state_dim=3,
        action_dim=2,
        learning_rate=0.001,
        gamma=0.99,
        epsilon_start=1.0,
        epsilon_end=0.01,
        epsilon_decay=0.995,
        buffer_capacity=10000,
        batch_size=64,
        target_update_freq=10,
        device=None
    ):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma
        self.epsilon = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay
        self.batch_size = batch_size
        self.target_update_freq = target_update_freq
        
        # Set device
        if device is None:
            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        else:
            self.device = torch.device(device)
        
        # Q-network and target network
        self.q_network = QNetwork(state_dim, action_dim).to(self.device)
        self.target_network = QNetwork(state_dim, action_dim).to(self.device)
        self.target_network.load_state_dict(self.q_network.state_dict())
        self.target_network.eval()
        
        # Optimizer and loss
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=learning_rate)
        self.criterion = nn.MSELoss()
        
        # Replay buffer
        self.replay_buffer = ReplayBuffer(buffer_capacity)
        
        # Training step counter
        self.training_steps = 0
    
    def select_action(self, state, training=True):
        """Select action using Œµ-greedy policy"""
        if training and random.random() < self.epsilon:
            return random.randint(0, self.action_dim - 1)
        else:
            with torch.no_grad():
                state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
                q_values = self.q_network(state_tensor)
                return q_values.argmax().item()
    
    def store_experience(self, state, action, reward, next_state, done):
        """Store experience in replay buffer"""
        self.replay_buffer.push(state, action, reward, next_state, done)
    
    def train_step(self):
        """Perform one training step"""
        if len(self.replay_buffer) < self.batch_size:
            return None
        
        # Sample mini-batch
        states, actions, rewards, next_states, dones = self.replay_buffer.sample(self.batch_size)
        
        # Move to device
        states = states.to(self.device)
        actions = actions.to(self.device)
        rewards = rewards.to(self.device)
        next_states = next_states.to(self.device)
        dones = dones.to(self.device)
        
        # Compute current Q-values
        current_q_values = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze(1)
        
        # Compute target Q-values
        with torch.no_grad():
            next_q_values = self.target_network(next_states).max(1)[0]
            target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
        
        # Compute loss
        loss = self.criterion(current_q_values, target_q_values)
        
        # Optimize
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        # Update training steps
        self.training_steps += 1
        
        # Update target network
        if self.training_steps % self.target_update_freq == 0:
            self.target_network.load_state_dict(self.q_network.state_dict())
        
        return loss.item()
    
    def decay_epsilon(self):
        """Decay exploration rate"""
        self.epsilon = max(self.epsilon_end, self.epsilon * self.epsilon_decay)

print("‚úì DQN components defined (QNetwork, ReplayBuffer, DQNAgent)")

## 6. Training Configuration

In [None]:
# Training hyperparameters
NUM_EPISODES = 1000
EPISODE_LENGTH = 100
LAMBDA_FAIRNESS = 0.5
LEARNING_RATE = 0.001
GAMMA = 0.99
EPSILON_START = 1.0
EPSILON_END = 0.01
EPSILON_DECAY = 0.995
BATCH_SIZE = 64
TARGET_UPDATE_FREQ = 10
PRINT_FREQ = 50

print("Training Configuration:")
print(f"  Episodes: {NUM_EPISODES}")
print(f"  Episode Length: {EPISODE_LENGTH}")
print(f"  Lambda (Fairness): {LAMBDA_FAIRNESS}")
print(f"  Learning Rate: {LEARNING_RATE}")
print(f"  Gamma (Discount): {GAMMA}")
print(f"  Epsilon: {EPSILON_START} ‚Üí {EPSILON_END} (decay: {EPSILON_DECAY})")
print(f"  Batch Size: {BATCH_SIZE}")
print(f"  Target Update Freq: {TARGET_UPDATE_FREQ}")
print(f"  Device: {device}")

## 7. Initialize Environment and Agent

In [None]:
# Initialize environment
env = LoanApprovalEnv(
    data=data,
    episode_length=EPISODE_LENGTH,
    lambda_fairness=LAMBDA_FAIRNESS
)

# Initialize agent
agent = DQNAgent(
    state_dim=3,
    action_dim=2,
    learning_rate=LEARNING_RATE,
    gamma=GAMMA,
    epsilon_start=EPSILON_START,
    epsilon_end=EPSILON_END,
    epsilon_decay=EPSILON_DECAY,
    batch_size=BATCH_SIZE,
    target_update_freq=TARGET_UPDATE_FREQ,
    device=device
)

print("‚úì Environment and Agent initialized")

## 8. Training Loop

In [None]:
# Metrics tracking
episode_rewards = []
episode_losses = []
fairness_metrics_history = []
epsilon_history = []

print("Starting training...")
print("-" * 80)

for episode in range(NUM_EPISODES):
    state = env.reset()
    episode_reward = 0
    episode_losses_temp = []
    step = 0
    
    for step in range(EPISODE_LENGTH):
        # Select action
        action = agent.select_action(state, training=True)
        
        # Take action
        next_state, reward, done, info = env.step(action)
        
        # Store experience
        agent.store_experience(state, action, reward, next_state, done)
        
        # Train agent
        loss = agent.train_step()
        if loss is not None:
            episode_losses_temp.append(loss)
        
        # Update
        state = next_state
        episode_reward += reward
        
        if done:
            break
    
    # Decay epsilon
    agent.decay_epsilon()
    
    # Record metrics
    episode_rewards.append(episode_reward)
    avg_loss = np.mean(episode_losses_temp) if episode_losses_temp else 0
    episode_losses.append(avg_loss)
    fairness_metrics = env.get_fairness_metrics()
    fairness_metrics_history.append(fairness_metrics)
    epsilon_history.append(agent.epsilon)
    
    # Print progress
    if (episode + 1) % PRINT_FREQ == 0:
        recent_rewards = episode_rewards[-PRINT_FREQ:]
        recent_losses = [l for l in episode_losses[-PRINT_FREQ:] if l > 0]
        recent_spd = [m['statistical_parity_difference'] for m in fairness_metrics_history[-PRINT_FREQ:]]
        
        print(f"Episode {episode + 1}/{NUM_EPISODES} | "
              f"Avg Reward: {np.mean(recent_rewards):.2f} | "
              f"Avg Loss: {np.mean(recent_losses):.4f} | "
              f"SPD: {np.mean(recent_spd):.4f} | "
              f"Œµ: {agent.epsilon:.3f}")

print("\n" + "=" * 80)
print("Training completed!")
print("=" * 80)

## 9. Evaluate Trained Agent

In [None]:
# Reset environment statistics for evaluation
env.total_approvals = {'Man': 0, 'Woman': 0}
env.total_counts = {'Man': 0, 'Woman': 0}

# Evaluate for 100 episodes
eval_episodes = 100
eval_rewards = []

print(f"Evaluating agent over {eval_episodes} episodes...")

for episode in range(eval_episodes):
    state = env.reset()
    episode_reward = 0
    
    for step in range(EPISODE_LENGTH):
        action = agent.select_action(state, training=False)
        next_state, reward, done, info = env.step(action)
        episode_reward += reward
        state = next_state
        
        if done:
            break
    
    eval_rewards.append(episode_reward)

# Get evaluation metrics
eval_metrics = env.get_fairness_metrics()

print("\n" + "=" * 60)
print("EVALUATION RESULTS")
print("=" * 60)
print(f"Average Reward: {np.mean(eval_rewards):.2f} ¬± {np.std(eval_rewards):.2f}")
print(f"\nApproval Rates:")
print(f"  Men: {eval_metrics['approval_rate_men']:.2%}")
print(f"  Women: {eval_metrics['approval_rate_women']:.2%}")
print(f"\nFairness Metrics:")
print(f"  Statistical Parity Difference: {eval_metrics['statistical_parity_difference']:.4f}")
print(f"  Disparity Ratio: {eval_metrics['disparity_ratio']:.3f}")
print(f"\nComparison with Baseline:")
print(f"  Baseline SPD: {spd_baseline:.4f}")
print(f"  Trained SPD: {eval_metrics['statistical_parity_difference']:.4f}")
improvement = abs(spd_baseline) - abs(eval_metrics['statistical_parity_difference'])
print(f"  SPD Improvement: {improvement:.4f} ({(improvement/abs(spd_baseline)*100):.1f}%)")
print("=" * 60)

## 10. Visualize Training Metrics

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('DQN Training Metrics for Bias Mitigation', fontsize=16, fontweight='bold')

episodes = range(1, len(episode_rewards) + 1)
window = 50

# Plot 1: Episode Rewards
axes[0, 0].plot(episodes, episode_rewards, alpha=0.6, label='Episode Reward')
if len(episode_rewards) >= window:
    moving_avg = pd.Series(episode_rewards).rolling(window=window).mean()
    axes[0, 0].plot(episodes, moving_avg, color='red', linewidth=2, label=f'{window}-Episode Moving Avg')
axes[0, 0].set_xlabel('Episode')
axes[0, 0].set_ylabel('Total Reward')
axes[0, 0].set_title('Episode Rewards')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Training Loss
valid_losses = [l for l in episode_losses if l > 0]
valid_episodes = [i+1 for i, l in enumerate(episode_losses) if l > 0]
axes[0, 1].plot(valid_episodes, valid_losses, alpha=0.6, label='Loss')
if len(valid_losses) >= window:
    moving_avg_loss = pd.Series(valid_losses).rolling(window=window).mean()
    axes[0, 1].plot(valid_episodes, moving_avg_loss, color='red', linewidth=2, label=f'{window}-Episode Moving Avg')
axes[0, 1].set_xlabel('Episode')
axes[0, 1].set_ylabel('Average Loss')
axes[0, 1].set_title('Training Loss')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Statistical Parity Difference
spd_values = [m['statistical_parity_difference'] for m in fairness_metrics_history]
axes[1, 0].plot(episodes, spd_values, alpha=0.6, label='SPD')
axes[1, 0].axhline(y=0, color='green', linestyle='--', linewidth=2, label='Perfect Parity')
if len(spd_values) >= window:
    moving_avg_spd = pd.Series(spd_values).rolling(window=window).mean()
    axes[1, 0].plot(episodes, moving_avg_spd, color='red', linewidth=2, label=f'{window}-Episode Moving Avg')
axes[1, 0].set_xlabel('Episode')
axes[1, 0].set_ylabel('SPD')
axes[1, 0].set_title('Statistical Parity Difference (Women - Men)')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Approval Rates by Gender
approval_men = [m['approval_rate_men'] for m in fairness_metrics_history]
approval_women = [m['approval_rate_women'] for m in fairness_metrics_history]
axes[1, 1].plot(episodes, approval_men, alpha=0.6, label='Men', color='blue')
axes[1, 1].plot(episodes, approval_women, alpha=0.6, label='Women', color='orange')
if len(approval_men) >= window:
    moving_avg_men = pd.Series(approval_men).rolling(window=window).mean()
    moving_avg_women = pd.Series(approval_women).rolling(window=window).mean()
    axes[1, 1].plot(episodes, moving_avg_men, color='darkblue', linewidth=2, label=f'Men ({window}-ep avg)')
    axes[1, 1].plot(episodes, moving_avg_women, color='darkorange', linewidth=2, label=f'Women ({window}-ep avg)')
axes[1, 1].set_xlabel('Episode')
axes[1, 1].set_ylabel('Approval Rate')
axes[1, 1].set_title('Approval Rates by Gender')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 11. Fairness Comparison: Baseline vs RL Agent

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.suptitle('Fairness Improvement: Baseline vs RL Agent', fontsize=16, fontweight='bold')

# Plot 1: Approval Rates Comparison
categories = ['Men', 'Women']
baseline_rates = [approval_rate_men, approval_rate_women]
trained_rates = [eval_metrics['approval_rate_men'], eval_metrics['approval_rate_women']]

x = np.arange(len(categories))
width = 0.35

axes[0].bar(x - width/2, baseline_rates, width, label='Baseline (Biased)', color='indianred', alpha=0.8)
axes[0].bar(x + width/2, trained_rates, width, label='RL Agent', color='steelblue', alpha=0.8)
axes[0].set_ylabel('Approval Rate')
axes[0].set_title('Approval Rates by Gender')
axes[0].set_xticks(x)
axes[0].set_xticklabels(categories)
axes[0].legend()
axes[0].grid(True, alpha=0.3, axis='y')

# Add value labels
for i, (b, t) in enumerate(zip(baseline_rates, trained_rates)):
    axes[0].text(i - width/2, b + 0.01, f'{b:.2%}', ha='center', va='bottom', fontsize=10)
    axes[0].text(i + width/2, t + 0.01, f'{t:.2%}', ha='center', va='bottom', fontsize=10)

# Plot 2: Statistical Parity Difference
spd_baseline_abs = abs(spd_baseline)
spd_trained_abs = abs(eval_metrics['statistical_parity_difference'])

axes[1].bar(['Baseline', 'RL Agent'], [spd_baseline_abs, spd_trained_abs], 
            color=['indianred', 'steelblue'], alpha=0.8)
axes[1].set_ylabel('Absolute SPD')
axes[1].set_title('Statistical Parity Difference (Absolute)')
axes[1].grid(True, alpha=0.3, axis='y')

# Add value labels
axes[1].text(0, spd_baseline_abs + 0.005, f'{spd_baseline_abs:.4f}', ha='center', va='bottom', fontsize=11)
axes[1].text(1, spd_trained_abs + 0.005, f'{spd_trained_abs:.4f}', ha='center', va='bottom', fontsize=11)

# Add improvement percentage
improvement_pct = ((spd_baseline_abs - spd_trained_abs) / spd_baseline_abs) * 100
axes[1].text(0.5, max(spd_baseline_abs, spd_trained_abs) * 0.5, 
            f'Improvement:\n{improvement_pct:.1f}%', 
            ha='center', va='center', fontsize=12, fontweight='bold',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.show()

## 12. Summary and Conclusions

In [None]:
print("=" * 80)
print(" " * 20 + "PROJECT SUMMARY")
print("=" * 80)
print("\nüìä BASELINE (Historical Biased Data)")
print("-" * 80)
print(f"Approval Rate (Men):    {approval_rate_men:.2%}")
print(f"Approval Rate (Women):  {approval_rate_women:.2%}")
print(f"SPD:                    {spd_baseline:.4f}")
print(f"Absolute Bias Gap:      {abs(spd_baseline):.2%}")

print("\nü§ñ TRAINED RL AGENT (DQN)")
print("-" * 80)
print(f"Approval Rate (Men):    {eval_metrics['approval_rate_men']:.2%}")
print(f"Approval Rate (Women):  {eval_metrics['approval_rate_women']:.2%}")
print(f"SPD:                    {eval_metrics['statistical_parity_difference']:.4f}")
print(f"Absolute Bias Gap:      {abs(eval_metrics['statistical_parity_difference']):.2%}")

print("\nüìà IMPROVEMENT")
print("-" * 80)
spd_reduction = abs(spd_baseline) - abs(eval_metrics['statistical_parity_difference'])
spd_reduction_pct = (spd_reduction / abs(spd_baseline)) * 100
print(f"SPD Reduction:          {spd_reduction:.4f} ({spd_reduction_pct:.1f}% improvement)")
print(f"Disparity Ratio Change: {approval_rate_women/approval_rate_men:.3f} ‚Üí {eval_metrics['disparity_ratio']:.3f}")

if spd_reduction > 0:
    print(f"\n‚úÖ SUCCESS: The RL agent successfully reduced gender bias by {spd_reduction_pct:.1f}%!")
else:
    print(f"\n‚ö†Ô∏è  Note: Further tuning may be needed.")

print("\nüéØ KEY TAKEAWAYS")
print("-" * 80)
print("1. DQN successfully learned to balance accuracy and fairness")
print("2. Fairness penalty in reward function guided equitable decisions")
print("3. Statistical Parity Difference significantly reduced")
print("4. Both genders now have more balanced approval rates")

print("\n" + "=" * 80)
print(" " * 15 + "RL Bias Mitigation - Phase 1 Complete")
print("=" * 80)