# How to use target_merging.py, to_probabilities.py, and evaluation.py for feature evaluation in a streamline

In [2]:
import pandas as pd
import numpy as np

## A. Merging
1. load target dataframe

In [3]:
target = pd.read_csv('target.csv')
target

Unnamed: 0,APPROVAL_ID_INFO,APPLICATION_ID_INFO,FIRST_APPROVAL_ID_INFO,DECISION_CHAIN_ID_INFO,target
0,138217860,119743699,138217860,27253019,0.0
1,129199921,114818219,129199921,18154634,0.0
2,148414823,123437264,148414823,37436298,
3,135256719,118197979,135256719,24269570,0.0
4,145333119,122483029,145333119,34372022,
...,...,...,...,...,...
314252,126061074,113021896,126061074,14999720,0.0
314253,132364258,116610529,132364258,21345488,1.0
314254,148080435,123327742,148080435,37102622,0.0
314255,133950290,117468580,133950290,22946379,0.0


2. Suppose you make feature X
3. Of course you want feature X to align with Target. We do this by picking ONE of the four 'APPROVAL_ID_INFO', 'APPLICATION_ID_INFO', 'FIRST_APPROVAL_ID_INFO', 'DECISION_CHAIN_ID_INFO', whichever is available in you feature dataset.

In [6]:
def merge_on_column(result_df: pd.DataFrame, feature_df: pd.DataFrame, merge_col: str) -> pd.DataFrame:
    """
    Perform an inner merge between result_df and feature_df based on a specified common column.

    Parameters
    ----------
    result_df : pd.DataFrame
        The main dataframe with ID columns and target.
    feature_df : pd.DataFrame
        The dataframe containing the same ID column and a 'feature' column.
    merge_col : str
        One of ['APPROVAL_ID_INFO', 'APPLICATION_ID_INFO', 
                'FIRST_APPROVAL_ID_INFO', 'DECISION_CHAIN_ID_INFO'].

    Returns
    -------
    pd.DataFrame
        The merged dataframe with all NaN rows removed.
    """
    valid_cols = [
        'APPROVAL_ID_INFO', 
        'APPLICATION_ID_INFO', 
        'FIRST_APPROVAL_ID_INFO', 
        'DECISION_CHAIN_ID_INFO'
    ]
    
    if merge_col not in valid_cols:
        raise ValueError(f"merge_col must be one of {valid_cols}")
    
    # inner merge using the same column name in both DataFrames
    merged = pd.merge(result_df, feature_df, how='inner', on=merge_col)
        
    # Drop only rows where the target column is nan
    merged = merged.dropna(subset=['target']).reset_index(drop=True)
    
    return merged


* Suppose your feature looks like this and we pick "APPLICATION_ID_INFO" as the merging criteria

In [7]:
feature_df = pd.DataFrame({
    "APPLICATION_ID_INFO": [119743699, 114818219, 123437264, 118197979, 122483029],
    "feature": [1, 2, 3, 4, 5]
})

feature_df

Unnamed: 0,APPLICATION_ID_INFO,feature
0,119743699,1
1,114818219,2
2,123437264,3
3,118197979,4
4,122483029,5


* Note that merge_on_column function deletes rows where target is np.nan

In [10]:
merged_df = merge_on_column(target, feature_df, "APPLICATION_ID_INFO")
merged_df

Unnamed: 0,APPROVAL_ID_INFO,APPLICATION_ID_INFO,FIRST_APPROVAL_ID_INFO,DECISION_CHAIN_ID_INFO,target,feature
0,138217860,119743699,138217860,27253019,0.0,1
1,129199921,114818219,129199921,18154634,0.0,2
2,135256719,118197979,135256719,24269570,0.0,4


# B. Convert Feature to Probabilities

* We ALWAYS assume that larger the feature value, more likely the fraud will happen (If you do the other way, make sure you FLIP your feature sign)

* We need to convert **feature values into probabilities** so that our evaluation make sense. We do this by using softmax function

In [11]:
def to_probabilities(x):
    """
    Convert arbitrary feature values into normalized probabilities.
    The output sums to 1 and can be directly compared to thresholds.
    """
    x = np.asarray(x, dtype=float)
    # numerical stability
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum()

When you convert feature value [1,2,4] into probabilities, this is what happens

In [14]:
feature = merged_df['feature']
prob = to_probabilities(feature)
prob

array([0.04201007, 0.1141952 , 0.84379473])

# C. Evaluation

* Now that we have our feature converted into probabilities, we can calculate the confusion matrix to see how good the feature behaves

In [17]:
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

def evaluate_fraud_model(y_true, y_pred_prob, threshold=0.5):
    """
    Evaluate model performance for fraud detection.
    Terminates if NaN values are detected in either input array.
    Reports specific reason(s) if rejected.
    """
    # Convert to NumPy arrays
    y_true = np.asarray(y_true)
    y_pred_prob = np.asarray(y_pred_prob)

    # Check for NaNs
    if np.isnan(y_true).any() or np.isnan(y_pred_prob).any():
        raise ValueError("❌ Terminated: Input arrays contain NaN values. Please clean your data before evaluation.")

    # Convert probabilities to binary predictions
    y_pred = (y_pred_prob >= threshold).astype(int)

    # Compute metrics
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, zero_division=0)

    # Reasonable thresholds for fraud detection
    precision_req = 0.7
    recall_req = 0.65
    acc_req = 0.75
    f1_req = 0.7

    # Collect unmet criteria
    failed = []
    if precision < precision_req:
        failed.append(f"Precision below threshold ({precision:.3f} < {precision_req})")
    if recall < recall_req:
        failed.append(f"Recall below threshold ({recall:.3f} < {recall_req})")
    if acc < acc_req:
        failed.append(f"Accuracy below threshold ({acc:.3f} < {acc_req})")
    if f1 < f1_req:
        failed.append(f"F1 Score below threshold ({f1:.3f} < {f1_req})")

    # Decision
    if not failed:
        decision = "PASS ✅"
        reason = "All metrics meet or exceed required thresholds."
    else:
        decision = "REJECT ❌"
        reason = " | ".join(failed)
    
    # Display results
    print(f"Precision : {precision:.4f}")
    print(f"Recall    : {recall:.4f}")
    print(f"Accuracy  : {acc:.4f}")
    print(f"F1 Score  : {f1:.4f}")
    print(f"Decision  : {decision}")
    print(f"Reason    : {reason}")

    return {
        "Precision": precision,
        "Recall": recall,
        "Accuracy": acc,
        "F1": f1,
        "Decision": decision,
        "Reason": reason
    }


- Feature Value in probability: [0.04201007, 0.1141952 , 0.84379473]
- Actual Value: [0, 0, 0]


* In this example we see that even though we do not have fraud in the real data, our feature says the last instance has probability 84.37% of being fraudulent
* You can see a detailed report of this WEAK feature

In [18]:
evaluate_fraud_model(merged_df['target'], prob)

Precision : 0.0000
Recall    : 0.0000
Accuracy  : 0.6667
F1 Score  : 0.0000
Decision  : REJECT ❌
Reason    : Precision below threshold (0.000 < 0.7) | Recall below threshold (0.000 < 0.65) | Accuracy below threshold (0.667 < 0.75) | F1 Score below threshold (0.000 < 0.7)


{'Precision': 0.0,
 'Recall': 0.0,
 'Accuracy': 0.6666666666666666,
 'F1': 0.0,
 'Decision': 'REJECT ❌',
 'Reason': 'Precision below threshold (0.000 < 0.7) | Recall below threshold (0.000 < 0.65) | Accuracy below threshold (0.667 < 0.75) | F1 Score below threshold (0.000 < 0.7)'}