In [2]:
# ============================================================================
# SIMPLE FINANCIAL FRAUD DETECTOR
# ============================================================================
# Detect accounting fraud using Benford's Law and financial red flags
# 1-week project, ~300 lines of code
#
# What it does:
# 1. Analyze company financial numbers
# 2. Check if they follow Benford's Law (fraud indicator)
# 3. Check for suspicious financial ratios
# 4. Score fraud probability (0-100%)
# ============================================================================

import pandas as pd
import numpy as np
from scipy import stats
import sqlite3
from datetime import datetime

print("="*80)
print("SIMPLE FINANCIAL FRAUD DETECTOR")
print("="*80 + "\n")

# ============================================================================
# SECTION 1: BENFORD'S LAW CHECKER
# ============================================================================
# Benford's Law: Real financial data has predictable first-digit patterns
# Fake data violates this pattern
#
# Example:
# - Real company: 30% of numbers start with 1, 18% with 2 (follows pattern)
# - Fraud company: 15% of numbers start with 1, 25% with 2 (violates pattern)
# ============================================================================

class BenfordsLawChecker:
    """Check if numbers follow Benford's Law"""
    
    def __init__(self):
        # Expected pattern for first digits
        self.expected = {
            1: 0.301, 2: 0.176, 3: 0.125, 4: 0.097, 5: 0.079,
            6: 0.067, 7: 0.058, 8: 0.051, 9: 0.046
        }
    
    def get_first_digit(self, number):
        """Get first digit of a number (e.g., 1234 -> 1)"""
        if number <= 0:
            return None
        return int(str(int(abs(number)))[0])
    
    def check_numbers(self, numbers):
        """
        Check if list of numbers follows Benford's Law
        
        Returns:
            fraud_score: 0-1 (higher = more likely fraud)
            p_value: statistical significance
        """
        # Extract first digits
        first_digits = [self.get_first_digit(n) for n in numbers if n > 0]
        
        if not first_digits:
            return 0.0, 1.0
        
        # Count observed frequencies
        observed = np.array([first_digits.count(i) for i in range(1, 10)])
        expected = np.array([self.expected[i] * len(first_digits) for i in range(1, 10)])
        
        # Chi-square test
        chi_square, p_value = stats.chisquare(observed, expected)
        
        # Fraud score: lower p-value = higher fraud probability
        fraud_score = 1 - p_value if p_value < 1 else 0
        
        return fraud_score, p_value


# ============================================================================
# SECTION 2: RED FLAG DETECTOR
# ============================================================================
# Check for suspicious financial patterns
#
# Red flags:
# 1. Revenue growing too fast (>50% per year)
# 2. Accounts Receivable > 40% of revenue
# 3. Profit margin > 40% (suspiciously high)
# 4. Assets growing faster than revenue
# ============================================================================

class RedFlagDetector:
    """Detect suspicious financial patterns"""
    
    @staticmethod
    def check_financials(data):
        """
        Check financial data for red flags
        
        Args:
            data: dict with keys like 'revenue', 'net_income', 'assets', 'ar'
        
        Returns:
            List of red flags found
        """
        flags = []
        
        # Flag 1: Revenue growth
        if data.get('revenue_growth', 0) > 0.50:
            flags.append(f"Revenue growth too high ({data['revenue_growth']:.0%})")
        
        # Flag 2: AR to Revenue ratio
        if data.get('ar_to_revenue', 0) > 0.40:
            flags.append(f"High AR/Revenue ({data['ar_to_revenue']:.0%})")
        
        # Flag 3: Profit margin
        if data.get('profit_margin', 0) > 0.40:
            flags.append(f"Suspiciously high margin ({data['profit_margin']:.0%})")
        
        # Flag 4: Asset growth vs Revenue
        if data.get('asset_growth', 0) > data.get('revenue_growth', 0) + 0.15:
            flags.append("Assets growing much faster than revenue")
        
        return flags


# ============================================================================
# SECTION 3: FRAUD SCORER
# ============================================================================
# Combine signals into final fraud score
# Score: 0-100%
# - 0-30%: Low risk (normal company)
# - 30-60%: Medium risk (watch list)
# - 60-100%: High risk (likely fraud)
# ============================================================================

class FraudScorer:
    """Calculate fraud probability"""
    
    @staticmethod
    def score(benford_score, red_flag_count, red_flags):
        """
        Calculate fraud probability
        
        Args:
            benford_score: 0-1 from Benford's Law test
            red_flag_count: Number of red flags
            red_flags: List of red flag descriptions
        
        Returns:
            fraud_probability: 0-100
            risk_level: 'LOW', 'MEDIUM', or 'HIGH'
        """
        # Combine signals (each contributes points out of 100)
        score = 0
        
        # Benford's Law: up to 40 points
        score += benford_score * 40
        
        # Red flags: 10 points each (max 60)
        score += min(red_flag_count * 10, 60)
        
        # Cap at 100
        score = min(score, 100)
        
        # Determine risk level
        if score > 60:
            risk = 'HIGH'
        elif score > 30:
            risk = 'MEDIUM'
        else:
            risk = 'LOW'
        
        return score, risk


# ============================================================================
# SECTION 4: DATABASE STORAGE
# ============================================================================

class FraudDatabase:
    """Store results in SQLite database"""
    
    def __init__(self, db_name='fraud_detector.db'):
        self.db = db_name
        self.init_db()
    
    def init_db(self):
        """Create database table"""
        conn = sqlite3.connect(self.db)
        conn.execute('''
            CREATE TABLE IF NOT EXISTS companies (
                id INTEGER PRIMARY KEY,
                name TEXT UNIQUE,
                fraud_score REAL,
                risk_level TEXT,
                red_flags TEXT,
                analysis_date TEXT
            )
        ''')
        conn.commit()
        conn.close()
    
    def save(self, company_name, fraud_score, risk_level, red_flags):
        """Save analysis to database"""
        conn = sqlite3.connect(self.db)
        conn.execute('''
            INSERT OR REPLACE INTO companies 
            (name, fraud_score, risk_level, red_flags, analysis_date)
            VALUES (?, ?, ?, ?, ?)
        ''', (company_name, fraud_score, risk_level, ','.join(red_flags), 
              datetime.now().isoformat()))
        conn.commit()
        conn.close()
    
    def get_all(self):
        """Get all results"""
        conn = sqlite3.connect(self.db)
        df = pd.read_sql('SELECT name, fraud_score, risk_level FROM companies ORDER BY fraud_score DESC', conn)
        conn.close()
        return df


# ============================================================================
# SECTION 5: MAIN ANALYSIS
# ============================================================================

def analyze_company(company_name, revenue, net_income, assets, accounts_receivable, 
                   prev_revenue=None, prev_assets=None):
    """
    Analyze a single company for fraud
    
    Args:
        company_name: Company name
        revenue: Annual revenue
        net_income: Annual net income (profit)
        assets: Total assets
        accounts_receivable: Accounts receivable (money owed)
        prev_revenue: Previous year revenue (optional)
        prev_assets: Previous year assets (optional)
    
    Returns:
        fraud_score: 0-100
        risk_level: LOW/MEDIUM/HIGH
        red_flags: List of suspicious items found
    """
    
    # Initialize
    benford_checker = BenfordsLawChecker()
    red_flag_detector = RedFlagDetector()
    fraud_scorer = FraudScorer()
    db = FraudDatabase()
    
    # Step 1: Benford's Law check
    # Check if key financial numbers follow natural pattern
    numbers = [revenue, net_income, assets, accounts_receivable]
    benford_score, p_value = benford_checker.check_numbers(numbers)
    
    # Step 2: Calculate financial metrics
    metrics = {
        'revenue': revenue,
        'net_income': net_income,
        'assets': assets,
        'ar': accounts_receivable,
        'profit_margin': net_income / revenue if revenue > 0 else 0,
        'ar_to_revenue': accounts_receivable / revenue if revenue > 0 else 0,
        'revenue_growth': (revenue - prev_revenue) / prev_revenue if prev_revenue else 0,
        'asset_growth': (assets - prev_assets) / prev_assets if prev_assets else 0,
    }
    
    # Step 3: Check for red flags
    red_flags = red_flag_detector.check_financials(metrics)
    
    # Step 4: Calculate fraud score
    fraud_score, risk_level = fraud_scorer.score(benford_score, len(red_flags), red_flags)
    
    # Step 5: Save to database
    db.save(company_name, fraud_score, risk_level, red_flags)
    
    # Step 6: Print report
    print(f"\n{company_name}")
    print("-" * 60)
    print(f"Fraud Score: {fraud_score:.1f}%")
    print(f"Risk Level: {risk_level}")
    print(f"Benford's Law Score: {benford_score:.2f}")
    
    if red_flags:
        print(f"Red Flags ({len(red_flags)}):")
        for flag in red_flags:
            print(f"  ⚠ {flag}")
    else:
        print("✓ No red flags detected")
    
    return fraud_score, risk_level, red_flags


# ============================================================================
# SECTION 6: EXAMPLE USAGE
# ============================================================================

if __name__ == "__main__":
    
    print("\nANALYZING SAMPLE COMPANIES\n")
    
    # Normal company (low fraud risk)
    analyze_company(
        "Normal Corp",
        revenue=1000000,
        net_income=250000,
        assets=2000000,
        accounts_receivable=250000,
        prev_revenue=900000,
        prev_assets=1800000
    )
    
    # Suspicious company (high fraud risk)
    analyze_company(
        "Suspicious Inc",
        revenue=1000000,
        net_income=600000,  # Too high profit
        assets=3500000,  # Assets growing too fast
        accounts_receivable=600000,  # AR too high
        prev_revenue=700000,
        prev_assets=2000000
    )
    
    # Medium risk company
    analyze_company(
        "Moderate Ltd",
        revenue=1000000,
        net_income=350000,
        assets=2500000,
        accounts_receivable=350000,
        prev_revenue=850000,
        prev_assets=2200000
    )
    
    # Print summary
    print("\n" + "="*60)
    print("SUMMARY")
    print("="*60)
    
    db = FraudDatabase()
    results = db.get_all()
    print(results.to_string(index=False))
    
    print("\n✓ Analysis complete")
    print("  Database saved: fraud_detector.db")
    print("  Summary:")
    high_risk = len(results[results['risk_level'] == 'HIGH'])
    medium_risk = len(results[results['risk_level'] == 'MEDIUM'])
    low_risk = len(results[results['risk_level'] == 'LOW'])
    print(f"    High Risk: {high_risk}")
    print(f"    Medium Risk: {medium_risk}")
    print(f"    Low Risk: {low_risk}")

SIMPLE FINANCIAL FRAUD DETECTOR


ANALYZING SAMPLE COMPANIES


Normal Corp
------------------------------------------------------------
Fraud Score: 28.3%
Risk Level: LOW
Benford's Law Score: 0.71
✓ No red flags detected

Suspicious Inc
------------------------------------------------------------
Fraud Score: 66.5%
Risk Level: HIGH
Benford's Law Score: 0.91
Red Flags (3):
  ⚠ High AR/Revenue (60%)
  ⚠ Suspiciously high margin (60%)
  ⚠ Assets growing much faster than revenue

Moderate Ltd
------------------------------------------------------------
Fraud Score: 15.2%
Risk Level: LOW
Benford's Law Score: 0.38
✓ No red flags detected

SUMMARY
          name  fraud_score risk_level
Suspicious Inc    66.465887       HIGH
   Normal Corp    28.275205        LOW
  Moderate Ltd    15.234537        LOW

✓ Analysis complete
  Database saved: fraud_detector.db
  Summary:
    High Risk: 1
    Medium Risk: 0
    Low Risk: 2
