# Import Libraries

In [1]:
import sklearn.metrics as Metrics
import sklearn.ensemble as Ensemble
import sklearn.tree as Tree
import sklearn.datasets as Datasets
import sklearn.model_selection as ModelSelection

from GenerateSyntheticData import *
from SingleFeatureImportance import *

import pandas as pd
import numpy as np

# Generate Synthetic Test Data

In [2]:
X, y = get_test_dataset(
    n_features=40,
    n_informative=5, 
    n_redundant=30,
    n_samples=10000,
    sigma_std=0.1
)

# Fit SFI

In [3]:
import warnings
warnings.filterwarnings('ignore')

classifier = Tree.DecisionTreeClassifier(
    criterion="entropy",
    max_features=1,
    class_weight="balanced",
    min_weight_fraction_leaf=0)

result = feature_importance_SFI(
    classifier,
    X, y,
    10,
    scoring="accuracy"
)

In [4]:
result = result.sort_values("Mean", ascending=False)
result

Unnamed: 0,FeatureName,Mean,StandardDeviation
37,R_27,0.6427,0.009355
15,R_5,0.638,0.010262
23,R_13,0.6368,0.00991
25,R_15,0.6363,0.009058
26,R_16,0.6356,0.008514
28,R_18,0.6326,0.009962
1,I_1,0.6304,0.008893
10,R_0,0.5237,0.031449
18,R_8,0.5203,0.031614
29,R_19,0.5196,0.02878


# Plot Results

In [5]:
import plotly.express as px

fig = px.bar(
    result,
    x="Mean", 
    y="FeatureName",
    error_x="StandardDeviation"
)

fig.update_layout(
    title="SFI Results",
    xaxis_title="Feature Name",
    yaxis_title="Feature Importance",
    template="plotly_dark",
    width=800, height=1200,
    # legend_title="Legend Title",
    # font=dict(
    #     family="Courier New, monospace",
    #     size=18,
    #     color="RebeccaPurple"
    # )
)

fig.show()

fig.write_json('SFI Results.json')

# Save Results

In [6]:
# fig.write_image("./Figs/SFI_results.png",format='png',engine='kaleido')