# Credit Card Anomaly Detection Analysis

This notebook applies rule-based and statistical anomaly detection to identify suspicious credit card transactions. It flags high-amount transactions, rapid succession transactions, and off-hour activity, and saves results for Tableau.

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

# Set up file paths
DATA_DIR = Path('../data')
INPUT_FILE = DATA_DIR / 'Cleaned_Transaction_Data.xlsx'
OUTPUT_FILE = DATA_DIR / 'Flagged_Transactions.csv'

# Load cleaned data
df = pd.read_excel(INPUT_FILE)
print('Data Shape:', df.shape)
df.head()

In [None]:
# Rule-Based Detection
# Rule 1: High Amount (above 99th percentile)
amount_threshold = df['Transaction_Amount'].quantile(0.99)
df['Rule_Flag'] = np.where(df['Transaction_Amount'] > amount_threshold, 'High_Amount', df['Rule_Flag'].fillna(''))

# Rule 2: Rapid Succession (multiple transactions within 5 minutes)
df = df.sort_values(['User_ID', 'Transaction_Date'])
df['Time_Diff'] = df.groupby('User_ID')['Transaction_Date'].diff().dt.total_seconds() / 60
df['Rule_Flag'] = np.where(
    (df['Time_Diff'] <= 5) & (df['Time_Diff'].notnull()),
    df['Rule_Flag'] + ',Rapid_Succession',
    df['Rule_Flag']
)

# Rule 3: Off-Hour Transactions (2 AM–4 AM)
df['Hour'] = df['Transaction_Date'].dt.hour
df['Rule_Flag'] = np.where(
    df['Hour'].isin([2, 3]),
    df['Rule_Flag'] + ',Off_Hour',
    df['Rule_Flag']
)

# Clean up Rule_Flag
df['Rule_Flag'] = df['Rule_Flag'].replace('', 'None').str.strip(',')

# Statistical Profiling: Flag high Z-scores (>2.5)
df['Rule_Flag'] = np.where(
    df['Z_Score_Amount'].abs() > 2.5,
    df['Rule_Flag'] + ',High_Z_Score',
    df['Rule_Flag']
)

print('Flagged Transactions:\n', df[df['Rule_Flag'] != 'None'][['Transaction_ID', 'User_ID', 'Transaction_Amount', 'Rule_Flag']])

In [None]:
# Save flagged transactions
df.to_csv(OUTPUT_FILE, index=False)
print(f'Flagged transactions saved to {OUTPUT_FILE}')