In [3]:
import pandas as pd
import numpy as np

def calculate_iv(df: pd.DataFrame, target_col: str, bins=10) -> pd.DataFrame:
    """
    Calculates Information Value (IV) for each variable (excluding the target).
    Parameters:
        df: pandas DataFrame
        target_col: name of the binary target column
        bins: number of bins for numeric features (default = 10)
    Returns:
        DataFrame with columns: ['variable', 'information_value']
    """
    iv_list = []

    for col in df.columns:
        if col == target_col:
            continue

        if df[col].dtype == 'object':
            binned = df[col]
        else:
            binned = pd.qcut(df[col].rank(method='first'), q=bins, duplicates='drop')

        grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
        grouped['non_event'] = grouped['count'] - grouped['sum']
        grouped['event_rate'] = grouped['sum'] / grouped['sum'].sum()
        grouped['non_event_rate'] = grouped['non_event'] / grouped['non_event'].sum()
        grouped['woe'] = np.log((grouped['event_rate'] + 1e-6) / (grouped['non_event_rate'] + 1e-6))
        grouped['iv'] = (grouped['event_rate'] - grouped['non_event_rate']) * grouped['woe']

        
        iv = grouped['iv'].sum()
        iv_list.append({'variable': col, 'information_value': iv})

    return pd.DataFrame(iv_list).sort_values(by='information_value', ascending=False)

In [7]:
from sklearn.datasets import make_classification
import pandas as pd
import numpy as np
X, y = make_classification(
    n_samples=10000,
    n_features=100,
    n_informative=9,
    n_redundant=6,
    n_classes=2,
    random_state=42
)


syndata = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(100)])
syndata['target'] = y

In [8]:

iv_df = calculate_iv(syndata, target_col='target')
print(iv_df)

  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(bi

      variable  information_value
3    feature_3           1.227860
82  feature_82           0.856793
23  feature_23           0.719528
85  feature_85           0.502827
18  feature_18           0.351348
..         ...                ...
40  feature_40           0.001289
51  feature_51           0.001286
89  feature_89           0.000905
83  feature_83           0.000655
15  feature_15           0.000518

[100 rows x 2 columns]


  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(binned)[target_col].agg(['count', 'sum'])
  grouped = df.groupby(bi