In [11]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
import pandas as pd

class FeatureEngineer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        if 'User_id' in X.columns:
            self.user_mean_amount = X.groupby('User_id')['amount'].mean()
        else:
            self.user_mean_amount = None
        return self

    def transform(self, X):
        X = X.copy()
        # transaction_hour
        if 'TimeStamp' in X.columns:
            X['transaction_hour'] = pd.to_datetime(X['TimeStamp']).dt.hour
        # amount_log
        X['amount_log'] = np.log1p(X['amount'])
        # amount_to_avg
        if self.user_mean_amount is not None and 'User_id' in X.columns:
            X['amount_to_avg'] = X.apply(
                lambda row: row['amount'] / self.user_mean_amount.get(row['User_id'], row['amount']),
                axis=1
            )
        else:
            X['amount_to_avg'] = 1.0
        # new_device_flag
        if 'User_id' in X.columns and 'device' in X.columns:
            X['new_device_flag'] = (
                X.groupby('User_id')['device'].apply(lambda s: s != s.shift(1)).astype(int)
            ).fillna(0).values
        else:
            X['new_device_flag'] = 0
        # hour_sin, hour_cos
        X['hour_sin'] = np.sin(2 * np.pi * X['transaction_hour'] / 24)
        X['hour_cos'] = np.cos(2 * np.pi * X['transaction_hour'] / 24)
        return X

In [12]:
import joblib
import shap
import pandas as pd

# 1. Define the FraudPredictor class
class FraudPredictor:
    def __init__(self, model_path):
        self.model_data = joblib.load(model_path)
        self.xgb_model = self.model_data['xgb_model']
        self.iso_model = self.model_data['iso_model']
        self.pipeline = self.model_data['pipeline']
        self.iso_thresh = self.model_data['iso_thresh']
        self.feature_names = self.model_data['feature_names']
        self.xgb_explainer = shap.TreeExplainer(self.xgb_model)
        
    def predict(self, transaction_data):
        processed = self.pipeline.transform(pd.DataFrame([transaction_data]))
        xgb_prob = self.xgb_model.predict_proba(processed)[0][1]
        iso_score = self.iso_model.decision_function(processed)[0]
        
        # Decision logic
        if xgb_prob > 0.8:
            decision = "FRAUD"
        elif (xgb_prob > 0.6 and iso_score < self.iso_thresh):
            decision = "NEED TO TAKE FEEDBACK"
        else:
            decision = "GENUINE"
        
        # SHAP explanation
        shap_values = self.xgb_explainer.shap_values(processed)[0]
        indicators = []
        total_impact = sum(np.abs(shap_values))
        for i, val in enumerate(shap_values):
            indicators.append({
                'feature': self.feature_names[i] if i < len(self.feature_names) else f'feature_{i}',
                'value': float(processed[0][i]),
                'impact_percent': round((abs(val)/total_impact)*100, 2) if total_impact else 0.0
            })
        indicators = sorted(indicators, key=lambda x: x['impact_percent'], reverse=True)[:5]
        
        fraud_pattern = indicators[0]['feature'] if indicators else "unknown"
        
        return {
            'decision': decision,
            'probability': round(float(xgb_prob), 4),
            'anomaly_score': round(float(iso_score), 4),
            'fraud_indicators': indicators,
            'fraud_pattern': fraud_pattern,
            'thresholds': {
                'xgb_high': 0.8,
                'xgb_feedback': 0.6,
                'iso_threshold': round(float(self.iso_thresh), 4)
            }
        }

# 2. Load the model
fraud_predictor = FraudPredictor('hybrid_model.pkl')

# 3. Prepare your transaction (raw parameters)
sample_transaction = {
    'account_age_days': 30,
    'payment_method': 'Credit Card',
    'device': 'Laptop',
    'category': 'Electronics',
    'amount': 35000.0,
    'quantity': 1,
    'total_value': 35000.0,
    'num_trans_24h': 1,
    'num_failed_24h': 0,
    'no_of_cards_from_ip': 1,
    'User_id': 123,
    'TimeStamp': '2024-06-24 14:00:00'
}

# 4. Run prediction
prediction = fraud_predictor.predict(sample_transaction)
print(prediction)

{'decision': 'GENUINE', 'probability': 0.0196, 'anomaly_score': -0.0135, 'fraud_indicators': [{'feature': 'amount_to_avg', 'value': 57.23007937010643, 'impact_percent': np.float32(19.59)}, {'feature': 'num_trans_24h', 'value': -1.0290281810780284, 'impact_percent': np.float32(12.92)}, {'feature': 'total_value', 'value': 1.0461800555450884, 'impact_percent': np.float32(12.07)}, {'feature': 'account_age_days', 'value': -1.1876666572786856, 'impact_percent': np.float32(11.43)}, {'feature': 'quantity', 'value': -0.7966284471819609, 'impact_percent': np.float32(7.63)}], 'fraud_pattern': 'amount_to_avg', 'thresholds': {'xgb_high': 0.8, 'xgb_feedback': 0.6, 'iso_threshold': 0.0}}
