# Import Libraries

In [8]:
from GenerateSyntheticData import *
from Clustering import *
from ClusteredMeanDecreaseImpurity import clustered_feature_importance_MDI

import sklearn.ensemble as Ensemble
import sklearn.tree as Tree
import pandas as pd
import numpy as np


# Generate Synthetic Test Data

In [9]:
X, y = get_test_dataset(
    n_features=40,
    n_informative=5, 
    n_redundant=30,
    n_samples=10000,
    sigma_std=0.1
)

# Fit Clustered MDI

## Clustering

In [10]:
import warnings
warnings.filterwarnings('ignore')

corr0, clusters, silh = clusterKMeansBase(
    X.corr(),
    numberClusters=25,
    iterations=20)

clusters

{0: ['I_3',
  'R_2',
  'R_3',
  'R_4',
  'R_6',
  'R_20',
  'R_22',
  'R_25',
  'R_28',
  'R_29'],
 1: ['I_2', 'R_7', 'R_12', 'R_24'],
 2: ['I_0', 'R_1', 'R_9', 'R_10', 'R_14', 'R_17', 'R_21', 'R_23', 'R_26'],
 3: ['I_1', 'R_5', 'R_13', 'R_15', 'R_16', 'R_18', 'R_27'],
 4: ['I_4', 'R_0', 'R_8', 'R_11', 'R_19'],
 5: ['N_0', 'N_1', 'N_2', 'N_3', 'N_4']}

## Fit

In [11]:
import warnings
warnings.filterwarnings('ignore')

classifier = Tree.DecisionTreeClassifier(
    criterion="entropy",
    max_features=1,
    class_weight="balanced",
    min_weight_fraction_leaf=0)

classifier = Ensemble.BaggingClassifier(
    base_estimator=classifier,
    n_estimators=1000,
    max_features=1.0,
    max_samples=1.0,
    oob_score=False)

fit = classifier.fit(X, y)

result = clustered_feature_importance_MDI(
    fit, 
    X.columns,
    clusters,
)

In [12]:
result

Unnamed: 0,Mean,StandardDeviation
C_0,0.146239,0.000374
C_1,0.142568,0.000853
C_2,0.184566,0.000612
C_3,0.311672,0.000959
C_4,0.157316,0.000857
C_5,0.057639,0.00022


# Plot Results

In [16]:
import plotly.express as px

result["ClusterIndex"] = result.index
fig = px.bar(
    result,
    x="Mean", 
    y="ClusterIndex",
    error_x="StandardDeviation",
    orientation='h'
)

fig.update_layout(
    title="Clustered MDI Results",
    xaxis_title="Feature Importance",
    yaxis_title="Cluster Index",
    template="plotly_dark",
    width=800, height=1200,
    # legend_title="Legend Title",
    # font=dict(
    #     family="Courier New, monospace",
    #     size=18,
    #     color="RebeccaPurple"
    # )
)

fig.show()

# Save Results

In [17]:
fig.write_image("./Figs/clustered_MDI_results.png",format='png',engine='kaleido')