In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from mlxtend.evaluate import feature_importance_permutation
import altair as alt

In [2]:
file_url = 'https://raw.githubusercontent.com/PacktWorkshops/The-Data-Science-Workshop/master/Chapter09/Dataset/phpYYZ4Qc.csv'

In [3]:
df = pd.read_csv(file_url)

In [4]:
y = df.pop('rej')

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.3, random_state=1)

In [6]:
rf_model = RandomForestRegressor(random_state=1, n_estimators=50, max_depth=6, min_samples_leaf=60)

In [7]:
rf_model.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=6, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=60,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=50, n_jobs=None, oob_score=False,
                      random_state=1, verbose=0, warm_start=False)

In [8]:
imp_vals, _ = feature_importance_permutation(predict_method=rf_model.predict, X=X_test.values, y=y_test.values, metric='r2', num_rounds=1, seed=2)
imp_vals

array([ 0.00000000e+00, -3.34728428e-05, -2.83476215e-05,  1.03738033e-04,
        4.61246775e-06,  1.96879681e-01,  8.71635991e-05, -7.16980150e-05,
        3.28788126e-04,  1.05860288e-03,  0.00000000e+00,  5.56589408e-01,
       -4.31208212e-05,  1.13215046e-04,  2.22409533e-05,  5.96895938e-05,
        5.35704113e-05,  1.76990072e-01,  2.81084956e-03,  6.79193119e-05,
        0.00000000e+00,  0.00000000e+00,  1.16553234e-02,  2.77582324e-05,
        1.40812233e-04,  1.96362926e-06,  3.66606090e-04, -1.82522826e-04,
        1.14460108e-05,  3.72080724e-05,  0.00000000e+00,  5.54878998e-04])

In [9]:
varimp_df = pd.DataFrame({'feature': df.columns, 'importance': imp_vals})

In [10]:
alt.Chart(varimp_df).mark_bar().encode(
    x='importance',
    y="feature"
)