## 2. BANKING & FINANCE

### 2.1 Fraud Detection & Prevention

**The Challenge:**
- Global card fraud: $28 billion annually
- Traditional rule-based systems miss 30-40% of fraud
- False positives: Blocking legitimate customers hurts revenue

**ML Solution: Real-Time Anomaly Detection**


In [None]:
from sklearn.ensemble import IsolationForest, RandomForestClassifier

class BankingFraudDetector:
    def __init__(self):
        self.anomaly_detector = IsolationForest(contamination=0.01)
        self.classification_model = RandomForestClassifier()
        
    def assess_transaction_risk(self, transaction):
        """Evaluate transaction in milliseconds"""
        
        # Extract features
        features = self.extract_transaction_features(transaction)
        
        # Method 1: Anomaly Detection
        # Is this transaction unusual compared to historical pattern?
        anomaly_score = self.anomaly_detector.decision_function([features])[0]
        
        # Method 2: Classification
        # Is this transaction fraudulent based on known patterns?
        fraud_prob = self.classification_model.predict_proba([features])[0][1]
        
        # Combine signals
        risk_score = 0.6 * fraud_prob + 0.4 * (1 - anomaly_score)  # 0-1 scale
        
        # Decision
        if risk_score > 0.9:
            action = 'BLOCK'
            verification = 'DECLINE'
        elif risk_score > 0.7:
            action = 'CHALLENGE'
            verification = 'OTP or 3D Secure'
        elif risk_score > 0.5:
            action = 'MONITOR'
            verification = 'Approve but flag for review'
        else:
            action = 'APPROVE'
            verification = None
        
        return {
            'action': action,
            'risk_score': risk_score,
            'fraud_probability': fraud_prob,
            'anomaly_score': anomaly_score,
            'verification_required': verification
        }
    
    def extract_transaction_features(self, txn):
        """Engineer features to detect fraud"""
        
        customer_id = txn['customer_id']
        
        # Behavioral baseline (what's normal for this customer?)
        baseline = self.get_customer_baseline(customer_id)
        
        features = {
            # Amount anomalies
            'amount_vs_avg': txn['amount'] / baseline['avg_transaction'],
            'amount_vs_max': txn['amount'] / baseline['max_transaction'],
            'is_round_amount': txn['amount'] % 100 == 0,  # Round amounts suspicious
            
            # Location anomalies
            'distance_from_home': calculate_distance(
                txn['location'],
                baseline['home_location']
            ),
            'time_to_previous': (
                txn['timestamp'] - baseline['last_transaction_time']
            ).total_seconds() / 3600,  # Hours
            'impossible_travel': is_impossible_travel(
                baseline['last_location'],
                txn['location'],
                self.time_to_previous
            ),
            'new_merchant_mcc': txn['merchant_mcc'] not in baseline['merchant_mccs'],
            'new_country': txn['country'] != baseline['home_country'],
            'high_risk_country': is_high_risk(txn['country']),
            
            # Velocity patterns
            'transactions_today': count_today_transactions(customer_id),
            'transactions_last_hour': count_transactions_last_hour(customer_id),
            'failed_attempts_today': count_failed_attempts_today(customer_id),
            'unique_merchants_today': count_unique_merchants_today(customer_id),
            
            # Device & Network
            'new_device': txn['device_id'] not in baseline['known_devices'],
            'new_ip': txn['ip_address'] not in baseline['known_ips'],
            'vpn_detected': is_vpn_or_proxy(txn['ip_address']),
            'suspicious_user_agent': is_bot_user_agent(txn['user_agent']),
            
            # Card characteristics
            'card_recently_issued': (
                datetime.now() - txn['card_issue_date']
            ).days < 30,
            'card_about_to_expire': (
                datetime.now() - txn['card_expiry']
            ).days < 30,
            'international_card': is_international_card(txn['card_number']),
            'card_brand_unusual': txn['card_brand'] not in baseline['used_brands'],
            
            # Transaction metadata
            'amount_matches_known_fraud': txn['amount'] in known_fraud_amounts,
            'mcc_matches_known_fraud': txn['merchant_mcc'] in fraud_prone_mccs,
            'cvv_provided': 'cvv' in txn,
            'avs_mismatch': txn.get('avs_result') != 'Y',
        }
        
        return np.array(list(features.values()))
    
    def get_customer_baseline(self, customer_id):
        """Build customer's normal behavior profile"""
        
        # Get last 6 months of transactions
        transactions = get_customer_transactions(customer_id, days=180)
        
        return {
            'avg_transaction': np.mean([t['amount'] for t in transactions]),
            'max_transaction': np.max([t['amount'] for t in transactions]),
            'std_transaction': np.std([t['amount'] for t in transactions]),
            'home_location': get_most_common_location(transactions),
            'home_country': get_most_common_country(transactions),
            'last_transaction_time': transactions[-1]['timestamp'],
            'last_location': transactions[-1]['location'],
            'merchant_mccs': set(t['merchant_mcc'] for t in transactions),
            'known_devices': set(t.get('device_id') for t in transactions),
            'known_ips': set(t.get('ip_address') for t in transactions),
            'used_brands': set(t['card_brand'] for t in transactions),
        }

# Real-world performance (Singapore banks):
# - 3.5x analyst productivity improvement
# - 72% reduction in false positives
# - Faster detection of new scam types
# - Processed millions of transactions daily

# Global impact:
# - Fraud detection accuracy: 95-99%
# - Response time: <100ms per transaction
# - False positive rate: <2% (important for customer satisfaction)


### 2.2 Credit Scoring & Loan Approval

**The Challenge:**
- Traditional scoring: Biased, inflexible, slow
- Miss creditworthy customers (especially new entrants)
- Too rigid for modern financial products

**ML Solution: Dynamic Credit Scoring**


In [None]:
class CreditScoringModel:
    def __init__(self):
        self.model = GradientBoostingClassifier()
    
    def predict_default_probability(self, applicant):
        """Determine probability applicant will default"""
        
        features = self.extract_credit_features(applicant)
        
        # Probability of default (0 = safe, 1 = high risk)
        default_prob = self.model.predict_proba([features])[0][1]
        
        # Convert to credit score (300-850 standard range)
        credit_score = 850 - (default_prob * 550)
        
        # Determine approval and terms
        if credit_score >= 750:
            decision = 'APPROVED'
            interest_rate = 4.5  # Best rate
            max_loan = applicant['requested_amount'] * 1.5
        elif credit_score >= 700:
            decision = 'APPROVED'
            interest_rate = 6.5
            max_loan = applicant['requested_amount']
        elif credit_score >= 650:
            decision = 'APPROVED_WITH_CONDITIONS'
            interest_rate = 9.0
            max_loan = applicant['requested_amount'] * 0.8
            conditions = 'Requires co-signer'
        else:
            decision = 'REJECTED'
            interest_rate = None
            max_loan = 0
        
        return {
            'credit_score': credit_score,
            'default_probability': default_prob,
            'decision': decision,
            'interest_rate': interest_rate,
            'max_loan_amount': max_loan,
            'conditions': conditions if 'conditions' in locals() else None
        }
    
    def extract_credit_features(self, applicant):
        """Engineer comprehensive credit features"""
        
        # Traditional features
        age = applicant['age']
        employment_length = applicant['employment_years']
        annual_income = applicant['annual_income']
        loan_amount = applicant['requested_amount']
        loan_to_income = loan_amount / annual_income
        
        # Credit history
        credit_age = applicant.get('oldest_account_age_years', 0)
        total_accounts = applicant.get('total_accounts', 0)
        open_accounts = applicant.get('open_accounts', 0)
        closed_accounts = applicant.get('closed_accounts', 0)
        
        # Historical behavior
        past_defaults = applicant.get('default_count', 0)
        past_late_payments = applicant.get('late_payment_count', 0)
        past_inquiries = applicant.get('credit_inquiries_6m', 0)  # Too many = desperate
        
        # Debt metrics
        total_debt = applicant.get('total_debt_outstanding', 0)
        debt_to_income = total_debt / annual_income if annual_income > 0 else 1.0
        max_credit_utilization = max(
            [a['balance'] / a['limit'] for a in applicant.get('accounts', [])]
        ) if applicant.get('accounts') else 0
        
        # Alternative data (for customers with no credit history)
        rent_payment_history = applicant.get('rent_paid_on_time', 1.0)  # %
        utility_payment_history = applicant.get('utilities_paid_on_time', 1.0)
        phone_payment_history = applicant.get('phone_paid_on_time', 1.0)
        
        # Employment stability
        job_stability_score = calculate_stability(
            applicant['employment_history']
        )
        industry_risk = get_industry_default_rate(applicant['industry'])
        
        # Demographics (careful: avoid discrimination)
        # Can use: education, occupation (not protected)
        # Cannot use: race, gender, age (in most jurisdictions)
        education_level = encode_education(applicant.get('education'))
        has_college_degree = 1 if applicant.get('education') == 'bachelor+' else 0
        
        return np.array([
            age,
            employment_length,
            annual_income,
            loan_to_income,
            credit_age,
            total_accounts,
            open_accounts,
            closed_accounts,
            past_defaults,
            past_late_payments,
            past_inquiries,
            total_debt,
            debt_to_income,
            max_credit_utilization,
            rent_payment_history,
            utility_payment_history,
            phone_payment_history,
            job_stability_score,
            industry_risk,
            education_level,
            has_college_degree
        ])

# Real-world impact:
# - Loan approval speed: 5 minutes (vs 5 days traditional)
# - Default rate: 2-4% (improved prediction)
# - Expanded customer base: +30-50% (reach underbanked)
# - Cost savings: Automation reduces processing cost 80%

# Alternative credit data:
# - Some banks now use mobile payment history
# - Rental payment history
# - Utility payment history
# - Gig economy income
# - To serve customers without traditional credit history


### 2.3 Stock Market Prediction & Portfolio Management


In [None]:
class PortfolioOptimizer:
    def __init__(self):
        self.model = LSTM()  # Time series deep learning
        
    def predict_returns(self, stock_symbol, horizon_days=30):
        """Forecast stock returns"""
        
        # Get historical data
        prices = get_historical_prices(stock_symbol, days=252)  # 1 year
        
        # Predict future returns
        features = self.create_timeseries_features(prices)
        predictions = self.model.predict(features)
        
        return predictions
    
    def optimize_portfolio(self, available_stocks, investment_amount):
        """Allocate funds across stocks to maximize returns/minimize risk"""
        
        # Get predictions for all stocks
        returns = {}
        volatilities = {}
        
        for stock in available_stocks:
            returns[stock] = self.predict_returns(stock)
            volatilities[stock] = calculate_volatility(stock)
        
        # Modern Portfolio Theory: Maximize Sharpe Ratio
        # Sharpe = (Expected Return - Risk Free Rate) / Volatility
        
        # Efficient frontier: Find optimal allocation
        correlations = calculate_correlation_matrix(available_stocks)
        
        optimal_allocation = minimize(
            objective=lambda w: -sharpe_ratio(w, returns, volatilities, correlations),
            weights_sum=1.0,
            bounds=(0, 1)  # No short selling
        )
        
        return {
            'allocation': optimal_allocation,
            'expected_return': calculate_portfolio_return(optimal_allocation, returns),
            'expected_volatility': calculate_portfolio_volatility(
                optimal_allocation, volatilities, correlations
            ),
            'sharpe_ratio': calculate_sharpe_ratio(...)
        }

# Real-world: Robo-advisors use this to manage trillions
# - Vanguard Personal Advisor Services
# - Wealthfront, Betterment (automated portfolio management)
# - Goldman Sachs: Replaced 600 stock traders with ML system (2017)


---
