In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from sklearn.datasets import load_diabetes
from sklearn.preprocessing import OrdinalEncoder

from featimp import (
    get_corr_importances,
    get_chi2_crosstab_importances,
    get_chi2_importances,
    get_anova_importances,
    get_mutual_info_importances,
    get_ml_importances,
    get_permutation_importances,
    get_feature_importances,
    )

sns.set()
warnings.simplefilter("ignore")
cm = sns.color_palette("coolwarm", as_cmap=True)

In [2]:
column_a = np.random.rand(1, 1000)
column_b = np.random.rand(1, 1000)
column_c = np.random.randint(2, size=1000)
column_d = np.random.randint(10, size=1000)
column_e = np.random.normal(0, 1, size=1000)
column_f = np.random.choice([True, False], 1000)
column_g = np.random.choice([1, 0], 1000)
test_df = pd.DataFrame()
test_df['a'] = column_a[0]
test_df['b'] = column_b[0]
test_df['c'] = column_c
test_df['d'] = column_d
test_df['e'] = column_e
test_df['f'] = column_f
test_df['g'] = column_g
test_df['target_reg'] = column_a[0]
test_df['target_clf_binary'] = column_c
test_df['target_clf_multi'] = column_d
display(test_df.head())

Unnamed: 0,a,b,c,d,e,f,g,target_reg,target_clf_binary,target_clf_multi
0,0.657793,0.376991,1,0,0.743753,False,1,0.657793,1,0
1,0.477385,0.64692,1,5,-1.181454,True,1,0.477385,1,5
2,0.060299,0.820574,1,1,-2.51102,True,0,0.060299,1,1
3,0.148096,0.822287,1,0,-0.68323,True,0,0.148096,1,0
4,0.08942,0.992849,1,1,0.227742,True,0,0.08942,1,1


In [3]:
fi_temp_df = get_feature_importances(data=test_df, num_features=['a', 'b', 'e'], cat_features=[], target='target_reg', task='reg', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,ANOVA,LGBM Imp.,PI mean,PI std,Rank
a,1.0,,1526.3,0.415271,0.016984,0.353452
b,0.045699,,345.5,-1e-05,1.8e-05,-0.174553
e,0.002807,,368.1,-1e-06,2e-05,-0.178899
TT (Sec),0.005998,0.002,2.203786,4.40472,0.0,0.0


In [4]:
fi_temp_df = get_feature_importances(data=test_df, num_features=['a', 'b', 'e'], cat_features=[], target='target_clf_binary', task='clf_binary', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
b,0.049603,,,2.461587,,43.2,-0.000112,0.039444,0.150566
a,0.002459,,,0.006032,,38.0,0.003759,0.043586,-0.050499
e,0.028557,,,0.81453,,32.8,-0.016408,0.028668,-0.100067
TT (Sec),0.004001,0.001,0.001,0.003998,0.0,2.160858,4.616494,0.0,0.0


In [5]:
fi_temp_df = get_feature_importances(data=test_df, num_features=['a', 'b', 'e'], cat_features=[], target='target_clf_multi', task='clf_multiable', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
e,,,1.480224,,129.4,0.003256,0.025672,0.126924
a,,,0.000205,,130.5,-0.000877,0.024623,-0.043695
b,,,1.167788,,126.8,-0.001461,0.018185,-0.083229
TT (Sec),0.001001,0.002,0.003001,0.0,9.627041,4.631765,0.0,0.0


In [6]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=['c', 'd'], target='target_reg', task='reg', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,ANOVA,LGBM Imp.,PI mean,PI std,Rank
d,,0.000205,168.2,-0.000249,0.002737,0.083333
c,,0.006032,101.6,-0.000508,0.002223,-0.083333
TT (Sec),0.002004,0.002996,1.842107,2.612047,0.0,0.0


In [7]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=['c', 'd'], target='target_clf_binary', task='clf_binary', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
c,,0.997998,493.0,,0.693549,100.0,0.509014,0.046263,0.085714
d,,0.0,0.000474,,0.00771,129.8,0.0,0.0,-0.085714
TT (Sec),0.001044,0.02599,0.003,0.001001,0.025998,1.434914,2.850217,0.0,0.0


In [8]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=['c', 'd'], target='target_clf_multi', task='clf_multiable', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
d,1.0,1849.020175,,2.300451,4124.2,0.898634,0.032561,0.142857
c,0.0,3.144871,,0.0,1541.6,0.0,0.0,-0.142857
TT (Sec),0.019792,0.004,0.002003,0.034829,3.623749,3.1627,0.0,0.0


In [9]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=[], bool_features=['f', 'g'], target='target_reg', task='reg', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,ANOVA,LGBM Imp.,PI mean,PI std,Rank
f,0.012096,0.146052,71.1,-0.000484,0.000695,-0.0
g,0.022627,0.511223,50.7,-0.00051,0.00103,-0.0
TT (Sec),0.004516,0.003999,1.322675,2.490495,0.0,0.0


In [10]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=[], bool_features=['f', 'g'], target='target_clf_binary', task='clf_binary', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
g,0.044089,0.027771,0.977757,1.943741,0.023072,61.7,0.027397,0.050976,0.102041
f,0.01517,0.0,0.105861,0.229725,0.019584,123.4,0.023961,0.030858,-0.102041
TT (Sec),0.001999,0.017,0.002,0.005004,0.024022,1.442639,2.820678,0.0,0.0


In [11]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=[], bool_features=['f', 'g'], target='target_clf_multi', task='clf_multiable', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
g,0.042983,5.459924,1.797239,0.013884,34.2,-0.003666,0.008516,0.047619
f,0.0,2.761712,0.903954,0.0,40.8,-0.002057,0.009113,-0.047619
TT (Sec),0.015634,0.004,0.002,0.041092,2.683158,2.963346,0.0,0.0
