In [None]:
import pandas as pd
import numpy as np
import pickle
import logging
import json
import sqlite3
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass, asdict
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('risk_compliance.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

@dataclass
class TransactionRecord:
    """Data structure for transaction records"""
    transaction_id: str
    timestamp: datetime
    amount: float
    sender_id: str
    receiver_id: str
    transaction_type: str
    location: Optional[str] = None
    description: Optional[str] = None
    additional_features: Optional[Dict] = None

@dataclass
class RiskAssessment:
    """Data structure for risk assessment results"""
    transaction_id: str
    risk_score: float
    risk_level: str
    confidence: float
    flagged_features: List[str]
    timestamp: datetime
    model_version: str
    threshold_used: float
    requires_review: bool
    compliance_status: str

class RiskComplianceSystem:
    """
    Automatic Risk Compliance System for Money Laundering Detection
    """
    
    def __init__(self, model_path: str = "best_model.pkl", 
                 metadata_path: str = "model_metadata.pkl",
                 db_path: str = "compliance_database.db"):
        """
        Initialize the Risk Compliance System
        
        Args:
            model_path: Path to the trained ML model
            metadata_path: Path to model metadata
            db_path: Path to SQLite database for storing results
        """
        self.model_path = model_path
        self.metadata_path = metadata_path
        self.db_path = db_path
        
        # Risk thresholds
        self.risk_thresholds = {
            'high': 0.8,
            'medium': 0.5,
            'low': 0.2
        }
        
        # Load model and metadata
        self.model = None
        self.metadata = None
        self.load_model()
        
        # Initialize database
        self.init_database()
        
        logger.info("Risk Compliance System initialized successfully")
    
    def load_model(self):
        """Load the trained ML model and its metadata"""
        try:
            with open(self.model_path, 'rb') as f:
                self.model = pickle.load(f)
            
            with open(self.metadata_path, 'rb') as f:
                self.metadata = pickle.load(f)
                
            logger.info(f"Model loaded: {self.metadata['model_name']}")
            logger.info(f"Model threshold: {self.metadata['threshold']:.4f}")
            
        except FileNotFoundError as e:
            logger.error(f"Model files not found: {e}")
            raise
        except Exception as e:
            logger.error(f"Error loading model: {e}")
            raise
    
    def init_database(self):
        """Initialize SQLite database for storing compliance data"""
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            
            # Create tables
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS transactions (
                    transaction_id TEXT PRIMARY KEY,
                    timestamp TEXT,
                    amount REAL,
                    sender_id TEXT,
                    receiver_id TEXT,
                    transaction_type TEXT,
                    location TEXT,
                    description TEXT,
                    additional_features TEXT
                )
            ''')
            
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS risk_assessments (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    transaction_id TEXT,
                    risk_score REAL,
                    risk_level TEXT,
                    confidence REAL,
                    flagged_features TEXT,
                    assessment_timestamp TEXT,
                    model_version TEXT,
                    threshold_used REAL,
                    requires_review INTEGER,
                    compliance_status TEXT,
                    FOREIGN KEY (transaction_id) REFERENCES transactions (transaction_id)
                )
            ''')
            
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS compliance_actions (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    transaction_id TEXT,
                    action_type TEXT,
                    action_timestamp TEXT,
                    user_id TEXT,
                    notes TEXT,
                    FOREIGN KEY (transaction_id) REFERENCES transactions (transaction_id)
                )
            ''')
            
            conn.commit()
            conn.close()
            logger.info("Database initialized successfully")
            
        except Exception as e:
            logger.error(f"Error initializing database: {e}")
            raise
    
    def preprocess_transaction(self, transaction: TransactionRecord) -> pd.DataFrame:
        """
        Preprocess a single transaction for model prediction
        
        Args:
            transaction: TransactionRecord object
            
        Returns:
            Preprocessed DataFrame ready for model prediction
        """
        # Create initial feature dictionary
        features = {
            'Amount': transaction.amount,
            'transaction_type': transaction.transaction_type,
            'sender_id': transaction.sender_id,
            'receiver_id': transaction.receiver_id
        }
        
        # Add datetime features if timestamp is available
        if transaction.timestamp:
            features['Hour'] = transaction.timestamp.hour
            features['Day_of_week'] = transaction.timestamp.weekday()
            features['Month'] = transaction.timestamp.month
            features['Is_weekend'] = 1 if transaction.timestamp.weekday() >= 5 else 0
            features['Is_night'] = 1 if transaction.timestamp.hour >= 22 or transaction.timestamp.hour <= 6 else 0
            
            # Cyclic encoding
            features['Hour_sin'] = np.sin(2 * np.pi * features['Hour'] / 24)
            features['Hour_cos'] = np.cos(2 * np.pi * features['Hour'] / 24)
            features['Day_of_week_sin'] = np.sin(2 * np.pi * features['Day_of_week'] / 7)
            features['Day_of_week_cos'] = np.cos(2 * np.pi * features['Day_of_week'] / 7)
            features['Month_sin'] = np.sin(2 * np.pi * features['Month'] / 12)
            features['Month_cos'] = np.cos(2 * np.pi * features['Month'] / 12)
        
        # Amount-based features
        features['Log_amount'] = np.log1p(transaction.amount)
        features['Amount_rounded'] = 1 if transaction.amount % 1 == 0 else 0
        
        # Add location if available
        if transaction.location:
            features['location'] = transaction.location
        
        # Add additional features
        if transaction.additional_features:
            features.update(transaction.additional_features)
        
        # Create DataFrame
        df = pd.DataFrame([features])
        
        # Fill missing values with 0 for features expected by the model
        expected_features = self.metadata.get('features_used', [])
        for feature in expected_features:
            if feature not in df.columns:
                df[feature] = 0
        
        return df
    
    def assess_risk(self, transaction: TransactionRecord) -> RiskAssessment:
        """
        Assess risk for a single transaction
        
        Args:
            transaction: TransactionRecord object
            
        Returns:
            RiskAssessment object with detailed results
        """
        try:
            # Preprocess transaction
            df = self.preprocess_transaction(transaction)
            
            # Get prediction probability
            risk_proba = self.model.predict_proba(df)[0]
            risk_score = risk_proba[1]  # Probability of money laundering
            
            # Determine risk level
            if risk_score >= self.risk_thresholds['high']:
                risk_level = 'HIGH'
                requires_review = True
                compliance_status = 'FLAGGED'
            elif risk_score >= self.risk_thresholds['medium']:
                risk_level = 'MEDIUM'
                requires_review = True
                compliance_status = 'REVIEW_REQUIRED'
            elif risk_score >= self.risk_thresholds['low']:
                risk_level = 'LOW'
                requires_review = False
                compliance_status = 'MONITOR'
            else:
                risk_level = 'MINIMAL'
                requires_review = False
                compliance_status = 'CLEAR'
            
            # Calculate confidence (distance from decision boundary)
            threshold = self.metadata['threshold']
            confidence = abs(risk_score - threshold)
            
            # Identify flagged features (simplified approach)
            flagged_features = self._identify_flagged_features(df, risk_score)
            
            # Create risk assessment
            assessment = RiskAssessment(
                transaction_id=transaction.transaction_id,
                risk_score=risk_score,
                risk_level=risk_level,
                confidence=confidence,
                flagged_features=flagged_features,
                timestamp=datetime.now(),
                model_version=self.metadata['model_name'],
                threshold_used=threshold,
                requires_review=requires_review,
                compliance_status=compliance_status
            )
            
            logger.info(f"Risk assessed for transaction {transaction.transaction_id}: "
                       f"{risk_level} ({risk_score:.4f})")
            
            return assessment
            
        except Exception as e:
            logger.error(f"Error assessing risk for transaction {transaction.transaction_id}: {e}")
            raise
    
    def _identify_flagged_features(self, df: pd.DataFrame, risk_score: float) -> List[str]:
        """
        Identify features that contribute to high risk score
        This is a simplified approach - in production, you might use SHAP or LIME
        """
        flagged_features = []
        
        # Check amount-based flags
        if df['Amount'].iloc[0] > 10000:
            flagged_features.append('High Amount')
        
        if df['Amount_rounded'].iloc[0] == 1 and df['Amount'].iloc[0] > 1000:
            flagged_features.append('Round Amount')
        
        # Check time-based flags
        if 'Is_night' in df.columns and df['Is_night'].iloc[0] == 1:
            flagged_features.append('Night Transaction')
        
        if 'Is_weekend' in df.columns and df['Is_weekend'].iloc[0] == 1:
            flagged_features.append('Weekend Transaction')
        
        # If no specific flags but high risk, add general flag
        if not flagged_features and risk_score > 0.7:
            flagged_features.append('Pattern Anomaly')
        
        return flagged_features
    
    def store_transaction(self, transaction: TransactionRecord):
        """Store transaction in database"""
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            
            cursor.execute('''
                INSERT OR REPLACE INTO transactions 
                (transaction_id, timestamp, amount, sender_id, receiver_id, 
                 transaction_type, location, description, additional_features)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                transaction.transaction_id,
                transaction.timestamp.isoformat() if transaction.timestamp else None,
                transaction.amount,
                transaction.sender_id,
                transaction.receiver_id,
                transaction.transaction_type,
                transaction.location,
                transaction.description,
                json.dumps(transaction.additional_features) if transaction.additional_features else None
            ))
            
            conn.commit()
            conn.close()
            
        except Exception as e:
            logger.error(f"Error storing transaction: {e}")
            raise
    
    def store_risk_assessment(self, assessment: RiskAssessment):
        """Store risk assessment in database"""
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            
            cursor.execute('''
                INSERT INTO risk_assessments 
                (transaction_id, risk_score, risk_level, confidence, flagged_features,
                 assessment_timestamp, model_version, threshold_used, requires_review, compliance_status)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                assessment.transaction_id,
                assessment.risk_score,
                assessment.risk_level,
                assessment.confidence,
                json.dumps(assessment.flagged_features),
                assessment.timestamp.isoformat(),
                assessment.model_version,
                assessment.threshold_used,
                1 if assessment.requires_review else 0,
                assessment.compliance_status
            ))
            
            conn.commit()
            conn.close()
            
        except Exception as e:
            logger.error(f"Error storing risk assessment: {e}")
            raise
    
    def process_transaction(self, transaction: TransactionRecord) -> RiskAssessment:
        """
        Complete transaction processing pipeline
        
        Args:
            transaction: TransactionRecord object
            
        Returns:
            RiskAssessment object
        """
        try:
            # Store transaction
            self.store_transaction(transaction)
            
            # Assess risk
            assessment = self.assess_risk(transaction)
            
            # Store assessment
            self.store_risk_assessment(assessment)
            
            # Log high-risk transactions
            if assessment.risk_level in ['HIGH', 'MEDIUM']:
                logger.warning(f"High-risk transaction detected: {transaction.transaction_id} "
                             f"- Risk Level: {assessment.risk_level} "
                             f"- Score: {assessment.risk_score:.4f}")
            
            return assessment
            
        except Exception as e:
            logger.error(f"Error processing transaction {transaction.transaction_id}: {e}")
            raise
    
    def get_pending_reviews(self) -> List[Dict]:
        """Get all transactions requiring manual review"""
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            
            cursor.execute('''
                SELECT t.*, r.risk_score, r.risk_level, r.flagged_features, r.compliance_status
                FROM transactions t
                JOIN risk_assessments r ON t.transaction_id = r.transaction_id
                WHERE r.requires_review = 1 AND r.compliance_status IN ('FLAGGED', 'REVIEW_REQUIRED')
                ORDER BY r.risk_score DESC, r.assessment_timestamp DESC
            ''')
            
            columns = [desc[0] for desc in cursor.description]
            results = [dict(zip(columns, row)) for row in cursor.fetchall()]
            
            conn.close()
            return results
            
        except Exception as e:
            logger.error(f"Error getting pending reviews: {e}")
            return []
    
    def update_compliance_status(self, transaction_id: str, new_status: str, 
                                user_id: str, notes: str = ""):
        """Update compliance status after manual review"""
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            
            # Update risk assessment status
            cursor.execute('''
                UPDATE risk_assessments 
                SET compliance_status = ?, requires_review = ?
                WHERE transaction_id = ?
            ''', (new_status, 0 if new_status in ['CLEARED', 'APPROVED'] else 1, transaction_id))
            
            # Log compliance action
            cursor.execute('''
                INSERT INTO compliance_actions 
                (transaction_id, action_type, action_timestamp, user_id, notes)
                VALUES (?, ?, ?, ?, ?)
            ''', (transaction_id, f"STATUS_UPDATE_{new_status}", 
                  datetime.now().isoformat(), user_id, notes))
            
            conn.commit()
            conn.close()
            
            logger.info(f"Compliance status updated for {transaction_id}: {new_status}")
            
        except Exception as e:
            logger.error(f"Error updating compliance status: {e}")
            raise
    
    def generate_compliance_report(self, days: int = 30) -> Dict:
        """Generate compliance summary report"""
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            
            # Date range
            end_date = datetime.now()
            start_date = end_date - timedelta(days=days)
            
            # Get summary statistics
            cursor.execute('''
                SELECT 
                    COUNT(*) as total_transactions,
                    AVG(risk_score) as avg_risk_score,
                    COUNT(CASE WHEN risk_level = 'HIGH' THEN 1 END) as high_risk_count,
                    COUNT(CASE WHEN risk_level = 'MEDIUM' THEN 1 END) as medium_risk_count,
                    COUNT(CASE WHEN risk_level = 'LOW' THEN 1 END) as low_risk_count,
                    COUNT(CASE WHEN requires_review = 1 THEN 1 END) as requiring_review,
                    COUNT(CASE WHEN compliance_status = 'FLAGGED' THEN 1 END) as flagged_count
                FROM risk_assessments 
                WHERE assessment_timestamp >= ?
            ''', (start_date.isoformat(),))
            
            result = cursor.fetchone()
            
            report = {
                'report_period': f"{start_date.date()} to {end_date.date()}",
                'total_transactions': result[0] or 0,
                'average_risk_score': result[1] or 0.0,
                'high_risk_transactions': result[2] or 0,
                'medium_risk_transactions': result[3] or 0,
                'low_risk_transactions': result[4] or 0,
                'transactions_requiring_review': result[5] or 0,
                'flagged_transactions': result[6] or 0,
                'generated_at': datetime.now().isoformat()
            }
            
            conn.close()
            return report
            
        except Exception as e:
            logger.error(f"Error generating compliance report: {e}")
            return {}

# Example usage and testing functions
def create_sample_transaction() -> TransactionRecord:
    """Create a sample transaction for testing"""
    return TransactionRecord(
        transaction_id=f"TXN_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
        timestamp=datetime.now(),
        amount=15000.0,
        sender_id="SENDER_001",
        receiver_id="RECEIVER_001",
        transaction_type="wire_transfer",
        location="US",
        description="Business payment",
        additional_features={"currency": "USD", "channel": "online"}
    )

def main():
    """Main function to demonstrate the risk compliance system"""
    try:
        # Initialize the system
        compliance_system = RiskComplianceSystem()
        
        # Process a sample transaction
        sample_transaction = create_sample_transaction()
        assessment = compliance_system.process_transaction(sample_transaction)
        
        print(f"Transaction {sample_transaction.transaction_id} processed:")
        print(f"Risk Level: {assessment.risk_level}")
        print(f"Risk Score: {assessment.risk_score:.4f}")
        print(f"Compliance Status: {assessment.compliance_status}")
        print(f"Requires Review: {assessment.requires_review}")
        
        # Get pending reviews
        pending = compliance_system.get_pending_reviews()
        print(f"\nPending reviews: {len(pending)}")
        
        # Generate compliance report
        report = compliance_system.generate_compliance_report(days=7)
        print(f"\nCompliance Report (Last 7 days):")
        for key, value in report.items():
            print(f"  {key}: {value}")
            
    except Exception as e:
        logger.error(f"Error in main: {e}")

if __name__ == "__main__":
    main()