In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from sklearn.datasets import load_diabetes
from sklearn.preprocessing import OrdinalEncoder

from featimp import (
    get_corr_importances,
    get_chi2_crosstab_importances,
    get_chi2_importances,
    get_anova_importances,
    get_mutual_info_importances,
    get_ml_importances,
    get_permutation_importances,
    get_feature_importances,
    )

sns.set()
warnings.simplefilter("ignore")
cm = sns.color_palette("coolwarm", as_cmap=True)

In [2]:
column_a = np.random.rand(1, 1000)
column_b = np.random.rand(1, 1000)
column_c = np.random.randint(2, size=1000)
column_d = np.random.randint(10, size=1000)
column_e = np.random.normal(0, 1, size=1000)
column_f = np.random.choice([True, False], 1000)
column_g = np.random.choice([1, 0], 1000)
test_df = pd.DataFrame()
test_df['a'] = column_a[0]
test_df['b'] = column_b[0]
test_df['c'] = column_c
test_df['d'] = column_d
test_df['e'] = column_e
test_df['f'] = column_f
test_df['g'] = column_g
test_df['target_reg'] = column_a[0]
test_df['target_clf_binary'] = column_c
test_df['target_clf_multi'] = column_d
display(test_df.head())

Unnamed: 0,a,b,c,d,e,f,g,target_reg,target_clf_binary,target_clf_multi
0,0.427945,0.768178,0,3,0.774484,True,0,0.427945,0,3
1,0.882503,0.068383,0,4,-0.705253,True,0,0.882503,0,4
2,0.4302,0.153434,1,6,1.329119,True,1,0.4302,1,6
3,0.753384,0.95635,1,4,0.386564,False,0,0.753384,1,4
4,0.307143,0.707226,1,0,-0.920966,True,0,0.307143,1,0


In [3]:
fi_temp_df = get_feature_importances(data=test_df, num_features=['a', 'b', 'e'], cat_features=[], target='target_reg', task='reg', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,ANOVA,LGBM Imp.,PI mean,PI std,Rank
a,1.0,,1502.1,0.411747,0.018241,1.060601
e,0.035756,,317.9,7e-06,1.7e-05,-0.522712
b,0.017616,,306.0,1.4e-05,1.6e-05,-0.537889
TT (Sec),0.003004,0.001998,2.024012,3.767669,0.0,0.0


In [5]:
fi_temp_df = get_feature_importances(data=test_df, num_features=['a', 'b', 'e'], cat_features=[], target='target_clf_binary', task='clf_binary', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
a,0.056167,,,3.15834,,60.9,0.042919,0.041592,0.652115
b,0.043754,,,1.914274,,49.4,0.017558,0.04288,-0.277578
e,0.009576,,,0.091521,,54.7,0.031839,0.036136,-0.374536
TT (Sec),0.003004,0.001,0.000998,0.001998,0.001,1.899049,3.81392,0.0,0.0


In [6]:
fi_temp_df = get_feature_importances(data=test_df, num_features=['a', 'b', 'e'], cat_features=[], target='target_clf_multi', task='clf_multiable', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
e,,,,0.803233,,102.3,-0.011315,0.024471,0.223783
a,,,,0.311322,,98.2,-0.011559,0.019661,-0.069209
b,,,,3.144636,,96.2,-0.024946,0.022128,-0.154574
TT (Sec),0.0,0.000999,0.001,0.004031,0.001026,8.675785,4.245188,0.0,0.0


In [6]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=['c', 'd'], target='target_reg', task='reg', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,ANOVA,LGBM Imp.,PI mean,PI std,Rank
d,,0.012179,41.5,0.000301,0.00309,0.75
c,,0.003391,17.4,-6.7e-05,0.001902,-0.75
TT (Sec),0.002004,0.002,1.72278,2.450246,0.0,0.0


In [7]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=['c', 'd'], target='target_clf_binary', task='clf_binary', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
c,,0.997988,536.0,,0.691053,100.0,0.504763,0.039265,0.428571
d,,0.009495,2e-05,,0.019505,124.7,0.0,0.0,-0.428571
TT (Sec),0.001002,0.017998,0.004029,0.000969,0.016004,1.356204,2.584228,0.0,0.0


In [8]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=['c', 'd'], target='target_clf_multi', task='clf_multiable', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
d,,1.0,1727.705421,,2.303832,4175.8,0.90063,0.026206,0.714286
c,,0.009495,4.877103,,0.018941,1866.5,0.0,0.0,-0.714286
TT (Sec),0.0,0.019974,0.001998,0.001041,0.029002,3.285459,2.692919,0.0,0.0


In [7]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=[], bool_features=['f', 'g'], target='target_reg', task='reg', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,ANOVA,LGBM Imp.,PI mean,PI std,Rank
g,0.02602,0.67617,41.0,-0.000274,0.00101,0.5
f,0.017554,0.307615,21.4,-6.6e-05,0.000617,-0.5
TT (Sec),0.003,0.004002,1.388907,2.232713,0.0,0.0


In [8]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=[], bool_features=['f', 'g'], target='target_clf_binary', task='clf_binary', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
f,0.007973,0.0,0.031719,0.063442,0.006558,100.0,0.035248,0.039558,0.0
g,0.016663,0.0,0.132446,0.277186,0.0,65.9,0.02205,0.039286,0.0
TT (Sec),0.003001,0.014001,0.004001,0.002029,0.016969,1.374997,2.536754,0.0,0.0


In [9]:
fi_temp_df = get_feature_importances(data=test_df, num_features=[], cat_features=[], bool_features=['f', 'g'], target='target_clf_multi', task='clf_multiable', method='all')
fi_temp_df.style.background_gradient(cmap=cm)

Unnamed: 0,Corr,Chi_Square_Crosstab,Chi_Square,ANOVA,MI Scores,LGBM Imp.,PI mean,PI std,Rank
f,,0.054819,5.993562,0.723501,0.018203,47.9,0.004621,0.009627,0.857143
g,,0.050709,5.522646,0.001963,0.0,45.1,0.001342,0.009824,-0.857143
TT (Sec),0.0,0.015032,0.002999,0.001971,0.023998,2.324852,2.672503,0.0,0.0
