# Import Libraries

In [7]:
from RiskLabAI.features.feature_importance.generate_synthetic_data import *
from RiskLabAI.features.feature_importance.feature_importance_mda import *

import sklearn.ensemble as Ensemble
import sklearn.tree as Tree
import pandas as pd
import numpy as np

# Generate Synthetic Test Data

In [8]:
X, y = get_test_dataset(
    n_features=40,
    n_informative=5, 
    n_redundant=30,
    n_samples=50000,
    sigma_std=0.1
)

# Fit MDA

In [9]:
import warnings
warnings.filterwarnings('ignore')

base_classifier = Tree.DecisionTreeClassifier(
    criterion="entropy",
    max_features=1,
    class_weight="balanced",
    min_weight_fraction_leaf=0)

classifier = Ensemble.BaggingClassifier(
    base_estimator=base_classifier,
    # n_estimators=10000,
    # max_features=1.0,
    # max_samples=1.0,
    # oob_score=False
)

n_splits = 10


mda = FeatureImportanceMDA(
    classifier, 
    X, y,
    n_splits
)

fold 0 start ...
fold 1 start ...
fold 2 start ...
fold 3 start ...
fold 4 start ...
fold 5 start ...
fold 6 start ...
fold 7 start ...
fold 8 start ...
fold 9 start ...


In [10]:
result = mda.compute().sort_values("Mean", ascending=False)
result

Unnamed: 0,Mean,StandardDeviation
N_3,0.000615,0.011507
N_1,-0.007891,0.009362
N_2,-0.008825,0.011165
N_0,-0.014151,0.004039
N_4,-0.018029,0.012453
R_20,-0.033958,0.016813
I_2,-0.038534,0.025367
R_22,-0.039828,0.012261
R_7,-0.040938,0.021156
R_28,-0.044137,0.032453


# Plot Results

In [11]:
import plotly.express as px

result["FeatureName"] = result.index
fig = px.bar(
    result,
    x="Mean", 
    y="FeatureName",
    error_x="StandardDeviation"    
)

fig.update_layout(
    title="MDA Results",
    xaxis_title="Feature Name",
    yaxis_title="Feature Importance",
    template="plotly_dark",
    width=800, height=1200,
    # legend_title="Legend Title",
    # font=dict(
    #     family="Courier New, monospace",
    #     size=18,
    #     color="RebeccaPurple"
    # )
)

fig.show()