In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from sklearn.datasets import load_diabetes
from sklearn.preprocessing import OrdinalEncoder

from featimp import (
    get_corr_importances,
    get_chi2_crosstab_importances,
    get_chi2_importances,
    get_anova_importances,
    get_mutual_info_importances,
    get_ml_importances,
    get_permutation_importances,
    get_feature_importances,
    )

sns.set()
warnings.simplefilter("ignore")
cm = sns.color_palette("coolwarm", as_cmap=True)

In [2]:
column_a = np.random.rand(1, 1000)
column_b = np.random.rand(1, 1000)
column_c = np.random.randint(2, size=1000)
column_d = np.random.randint(10, size=1000)
column_e = np.random.normal(0, 1, size=1000)
test_df = pd.DataFrame()
test_df['a'] = column_a[0]
test_df['b'] = column_b[0]
test_df['c'] = column_c
test_df['d'] = column_d
test_df['e'] = column_e
test_df['target_reg'] = column_a[0]
test_df['target_clf_binary'] = column_c
test_df['target_clf_multi'] = column_d
display(test_df.head())

Unnamed: 0,a,b,c,d,e,target_reg,target_clf_binary,target_clf_multi
0,0.300411,0.220093,1,5,0.085985,0.300411,1,5
1,0.040481,0.509388,0,0,0.013116,0.040481,0,0
2,0.209462,0.882535,1,7,-0.271098,0.209462,1,7
3,0.66087,0.435131,1,9,-0.42693,0.66087,1,9
4,0.823329,0.842754,1,2,-0.865616,0.823329,1,2


In [3]:
fi_temp_df = get_feature_importances(data=test_df, num_features=['a', 'b', 'e'], cat_features=[], target='target_reg', task='reg', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,ANOVA,LGBM Imp.,PI mean,PI std,Rank
a,1.0,,1492.4,0.393233,0.021121,1.060386
b,0.050035,,304.4,-4e-06,1.4e-05,-0.51823
e,0.005785,,304.8,9e-06,1.9e-05,-0.542156
TT (Sec),0.003989,0.000997,1.99018,4.596878,0.0,0.0


In [4]:
fi_temp_df = get_feature_importances(data=test_df, num_features=['a', 'b', 'e'], cat_features=[], target='target_clf_binary', task='clf_binary', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
a,0.022192,,,0.491721,,45.1,0.028281,0.032722,0.182797
e,0.046069,,,2.122585,,41.6,0.004398,0.037729,0.148323
b,0.009268,,,0.085735,,45.3,-0.007626,0.041183,-0.33112
TT (Sec),0.001994,0.001995,0.0,0.003991,0.000996,1.897536,4.301232,0.0,0.0


In [5]:
fi_temp_df = get_feature_importances(data=test_df, num_features=['a', 'b', 'e'], cat_features=[], target='target_clf_multi', task='clf_multiable', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
b,,,,1.024484,,121.3,0.009054,0.021862,0.149339
e,,,,1.683552,,118.1,0.007548,0.025329,0.006013
a,,,,0.021205,,115.7,0.013726,0.018575,-0.155352
TT (Sec),0.0,0.000996,0.000997,0.001995,0.001994,8.457257,4.827351,0.0,0.0


In [6]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=['c', 'd'], target='target_reg', task='reg', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,ANOVA,LGBM Imp.,PI mean,PI std,Rank
c,,0.491721,5.4,-0.001515,0.001706,0.25
d,,0.021205,13.6,-0.002208,0.00275,-0.25
TT (Sec),0.000996,0.00399,1.570452,2.653058,0.0,0.0


In [7]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=['c', 'd'], target='target_clf_binary', task='clf_binary', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
c,,0.997998,502.0,,0.693639,100.0,0.498531,0.034853,0.428571
d,,0.0,1.91553,,0.0,135.4,0.0,0.0,-0.428571
TT (Sec),0.000997,0.016955,0.002992,0.0,0.018951,1.135264,2.804811,0.0,0.0


In [8]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=['c', 'd'], target='target_clf_multi', task='clf_multiable', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
d,,1.0,1841.250055,,2.30358,4114.9,0.90402,0.025041,0.714286
c,,0.0,3.872335,,0.010254,1708.3,0.0,0.0,-0.714286
TT (Sec),0.0,0.017951,0.002994,0.000997,0.023948,3.126704,2.947004,0.0,0.0
