In [18]:
import pandas as pd
import joblib
from sklearn.pipeline import Pipeline
import warnings
import sys
warnings.filterwarnings("ignore")

In [19]:
def load_model_with_metadata(model_path):
    """
    Load model that was saved with metadata
    Args:
        model_path: Path to the saved model (.pkl file)
    Returns:
        model: Loaded scikit-learn model
        feature_names: List of expected feature names
        categorical_features: List of categorical features
    """
    try:
        model_data = joblib.load(model_path)
        if isinstance(model_data, dict) and 'model' in model_data:
            return (
                model_data['model'],
                model_data.get('feature_names', []),
                model_data.get('categorical_features', []),
                model_data.get('training_columns', [])
            )
        return model_data, [], [], []  # Assume it's a raw model if no metadata
    except Exception as e:
        raise ValueError(f"Error loading model: {str(e)}")

In [20]:
def prepare_input_data(input_dict, expected_features, categorical_mapping=None):
    """
    Prepare input data matching the model's expected features with proper one-hot encoding
    Args:
        input_dict: Dictionary of input features and values
        expected_features: List of features the model expects
        categorical_mapping: Dictionary mapping original categories to one-hot columns
    Returns:
        DataFrame with properly formatted input data
    """
    if not expected_features:
        raise ValueError("No expected features provided")
    
    # Create a dictionary to hold all feature values, initialized to 0
    feature_values = {feature: 0 for feature in expected_features}
    
    # First handle numerical features
    numerical_features = [f for f in input_dict if f not in categorical_mapping]
    for feature in numerical_features:
        if feature in expected_features:
            feature_values[feature] = input_dict[feature]
        else:
            print(f"Warning: Numerical feature '{feature}' not used in model")
    
    # Then handle categorical features with one-hot encoding
    if categorical_mapping:
        for original_feature, encoded_prefix in categorical_mapping.items():
            if original_feature in input_dict:
                value = input_dict[original_feature]
                encoded_column = f"{encoded_prefix}_{value}"
                
                if encoded_column in expected_features:
                    feature_values[encoded_column] = 1
                else:
                    print(f"Warning: Category '{value}' not found in model for feature '{original_feature}'")
    
    # Create DataFrame from the prepared dictionary
    input_data = pd.DataFrame([feature_values], columns=expected_features)
    return input_data

In [21]:
def predict_fraud(model, input_data):
    """
    Make predictions using the loaded model
    Args:
        model: Loaded scikit-learn model
        input_data: DataFrame with properly formatted input data
    Returns:
        prediction: Fraud prediction (0 or 1)
        probability: Probability of fraud
    """
    try:
        prediction = model.predict(input_data)
        probability = model.predict_proba(input_data)[:, 1]
        return prediction[0], probability[0]
    except Exception as e:
        raise ValueError(f"Prediction error: {str(e)}")

In [22]:
CATEGORICAL_MAPPING = {
    'Transaction_Type': 'Transaction_Type',
    'Device_Type': 'Device_Type',
    'Location': 'Location',
    'Merchant_Category': 'Merchant_Category',
    'Authentication_Method': 'Authentication_Method'
}

def main():
    MODEL_PATH = '../assets/fraud_detection_model_with_metadata.pkl'
    
    # Example transaction data
    transaction_data = {
        'Transaction_Amount': 1500,
        'Transaction_Type': 'Online',
        'Account_Balance': 5000,
        'Device_Type': 'Mobile',
        'Location': 'New York',
        'Merchant_Category': 'Electronics',
        'IP_Address_Flag': 1,
        'Daily_Transaction_Count': 10,
        'Avg_Transaction_Amount_7d': 200,
        'Failed_Transaction_Count_7d': 3,
        'Transaction_Distance': 1500,
        'Authentication_Method': 'Password',
        'Risk_Score': 0.9,
        'Is_Weekend': 0,
        'Hour': 3,
        'DayOfWeek': 4,
        'Is_Night': 1,
        'Amount_to_Balance_Ratio': 1500/5000,
        'Amount_Deviation': 1500-200,
        'High_Risk_Category': 1,
        'Previous_Fraudulent_Activity': 0
    }

    try:
        # 1. Load model with metadata
        model, feature_names, categorical_features, training_columns = load_model_with_metadata(MODEL_PATH)
        print("Model loaded successfully")
        
        if not feature_names:
            print("Warning: No feature names found in model metadata")
            # Try to get feature names from model
            if hasattr(model, 'feature_names_in_'):
                feature_names = model.feature_names_in_
            elif isinstance(model, Pipeline):
                print("Model is a pipeline but no feature names available")
        
        if not feature_names:
            raise ValueError("Could not determine required features for model")
            
        print("Expected features:", feature_names)

        # 2. Prepare input data
        input_df = prepare_input_data(
            transaction_data, 
            feature_names,
            categorical_mapping=CATEGORICAL_MAPPING
        )
        
        # 3. Make prediction
        prediction, probability = predict_fraud(model, input_df)
        
        if prediction is not None:
            print("\nPrediction Results:")
            print(f"Fraud Prediction: {prediction} ({'Fraud' if prediction == 1 else 'Legitimate'})")
            print(f"Fraud Probability: {probability:.4f}")
            
            # Interpretation
            threshold = 0.5  # Adjustable threshold
            if probability > threshold:
                print("ALERT: This transaction is likely fraudulent!")
            else:
                print("This transaction appears legitimate.")
                
    except Exception as e:
        print(f"Error: {str(e)}", file=sys.stderr)
        sys.exit(1)

In [23]:
main()

Model loaded successfully
Expected features: ['Transaction_Amount', 'Account_Balance', 'IP_Address_Flag', 'Previous_Fraudulent_Activity', 'Daily_Transaction_Count', 'Avg_Transaction_Amount_7d', 'Failed_Transaction_Count_7d', 'Transaction_Distance', 'Risk_Score', 'Is_Weekend', 'Hour', 'DayOfWeek', 'Is_Night', 'Amount_to_Balance_Ratio', 'Amount_Deviation', 'High_Risk_Category', 'Transaction_Type_ATM Withdrawal', 'Transaction_Type_Bank Transfer', 'Transaction_Type_Online', 'Transaction_Type_POS', 'Device_Type_Laptop', 'Device_Type_Mobile', 'Device_Type_Tablet', 'Location_London', 'Location_Mumbai', 'Location_New York', 'Location_Sydney', 'Location_Tokyo', 'Merchant_Category_Clothing', 'Merchant_Category_Electronics', 'Merchant_Category_Groceries', 'Merchant_Category_Restaurants', 'Merchant_Category_Travel', 'Authentication_Method_Biometric', 'Authentication_Method_OTP', 'Authentication_Method_PIN', 'Authentication_Method_Password']


Error: Prediction error: columns are missing: {'Device_Type', 'Authentication_Method', 'Merchant_Category', 'Location', 'Transaction_Type'}


SystemExit: 1