In [1]:
import pandas as pd
import numpy as np
import random
import datetime

def load_integrated_alerts(filepath='final_aml_integrated_alerts.csv'):
    """
    Loads the integrated AML alerts DataFrame.
    """
    try:
        alerts_df = pd.read_csv(filepath)
        print(f"Successfully loaded integrated alerts from '{filepath}'. Shape: {alerts_df.shape}")
        if alerts_df.empty:
            print("The alerts DataFrame is empty. Cannot proceed with simulation.")
            return pd.DataFrame()
        return alerts_df
    except FileNotFoundError:
        print(f"Error: Alert file '{filepath}' not found. Please ensure Step 5 was completed and the file exists.")
        return pd.DataFrame()
    except Exception as e:
        print(f"An error occurred while loading alerts: {e}")
        return pd.DataFrame()

def prioritize_alerts(alerts_df):
    """
    Sorts alerts by Integrated_Risk_Score in descending order.
    """
    if alerts_df.empty:
        print("No alerts to prioritize.")
        return alerts_df
    
    # Ensure Integrated_Risk_Score is numeric for proper sorting
    alerts_df['Integrated_Risk_Score'] = pd.to_numeric(alerts_df['Integrated_Risk_Score'], errors='coerce')
    alerts_df = alerts_df.sort_values(by='Integrated_Risk_Score', ascending=False).reset_index(drop=True)
    print("\nAlerts prioritized by Integrated Risk Score (Top 5):")
    print(alerts_df[['Customer_ID', 'Customer_Name', 'Integrated_Risk_Score', 'Overall_Risk_Level', 'Alert_Reason']].head())
    return alerts_df

def simulate_analyst_review(alerts_df, num_alerts_to_review=5):
    """
    Simulates an analyst reviewing a subset of alerts and assigning dispositions.
    """
    if alerts_df.empty:
        print("No alerts to review.")
        return alerts_df
    
    print(f"\n--- Simulating Analyst Review for {min(num_alerts_to_review, len(alerts_df))} Alerts ---")

    # Initialize new columns for review status
    alerts_df['Review_Status'] = 'Pending'
    alerts_df['Disposition'] = 'N/A'
    alerts_df['Review_Notes'] = ''
    alerts_df['Review_Date'] = pd.NaT # Not a Time
    alerts_df['Reviewed_By'] = 'Analyst_Sim'

    disposition_options = ['False Positive', 'Not Suspicious', 'Suspicious - Escalated', 'Suspicious - SAR Filed']
    
    # Select a subset of alerts to simulate review
    alerts_to_review_indices = alerts_df.head(num_alerts_to_review).index
    
    for idx in alerts_to_review_indices:
        alert = alerts_df.loc[idx]
        print(f"\nReviewing Alert for Customer ID: {alert['Customer_ID']} ({alert['Customer_Name']})")
        print(f"  Integrated Risk Score: {alert['Integrated_Risk_Score']:.2f}")
        print(f"  Overall Risk Level: {alert['Overall_Risk_Level']}")
        print(f"  Alert Reason: {alert['Alert_Reason']}")
        print(f"  Sanction Alert Flag: {alert['Sanction_Alert_Flag']}")
        print(f"  Number of AML Alerts: {alert['Num_AML_Alerts']}")
        
        # Simulate disposition choice
        disposition = random.choice(disposition_options)
        notes = f"Simulated review: Disposition set to '{disposition}' based on initial assessment."
        
        alerts_df.loc[idx, 'Review_Status'] = 'Completed'
        alerts_df.loc[idx, 'Disposition'] = disposition
        alerts_df.loc[idx, 'Review_Notes'] = notes
        alerts_df.loc[idx, 'Review_Date'] = datetime.datetime.now()
        
        print(f"  -> Disposition: '{disposition}'")
        print(f"  -> Notes: '{notes}'")

    print("\nSimulated review complete.")
    return alerts_df

def generate_disposition_summary(alerts_df):
    """
    Generates a summary of alert dispositions.
    """
    if alerts_df.empty or 'Disposition' not in alerts_df.columns:
        print("No disposition data available for summary.")
        return
    
    print("\n--- Alert Disposition Summary ---")
    disposition_counts = alerts_df['Disposition'].value_counts(dropna=False)
    print("Disposition Counts:")
    print(disposition_counts)

    print("\nBreakdown by Overall Risk Level and Disposition:")
    risk_disposition_crosstab = pd.crosstab(alerts_df['Overall_Risk_Level'], alerts_df['Disposition'], dropna=False)
    print(risk_disposition_crosstab)

    # Calculate False Positive Rate (example)
    false_positives = disposition_counts.get('False Positive', 0)
    total_reviewed = alerts_df[alerts_df['Review_Status'] == 'Completed'].shape[0]
    
    if total_reviewed > 0:
        fpr = (false_positives / total_reviewed) * 100
        print(f"\nSimulated False Positive Rate (out of reviewed alerts): {fpr:.2f}%")
    else:
        print("\nNo alerts have been reviewed yet to calculate False Positive Rate.")


# --- Main Execution Flow for Alert Triage Simulation ---
if __name__ == "__main__":
    print("--- Starting Alert Triage and Workflow Simulation ---")

    # 1. Load Integrated Alerts
    integrated_alerts_df = load_integrated_alerts()

    if not integrated_alerts_df.empty:
        # 2. Prioritize Alerts
        prioritized_alerts_df = prioritize_alerts(integrated_alerts_df.copy()) # Work on a copy

        # 3. Simulate Analyst Review
        reviewed_alerts_df = simulate_analyst_review(prioritized_alerts_df.copy(), num_alerts_to_review=10) # Review top 10 alerts

        # 4. Generate Disposition Summary
        generate_disposition_summary(reviewed_alerts_df)
        
        # Save the reviewed alerts with dispositions
        reviewed_alerts_df.to_csv('simulated_reviewed_alerts.csv', index=False)
        print("\nSimulated reviewed alerts saved to 'simulated_reviewed_alerts.csv'")

    print("\n--- Alert Triage and Workflow Simulation Complete ---")


--- Starting Alert Triage and Workflow Simulation ---
Successfully loaded integrated alerts from 'final_aml_integrated_alerts.csv'. Shape: (0, 7)
The alerts DataFrame is empty. Cannot proceed with simulation.

--- Alert Triage and Workflow Simulation Complete ---
