In [None]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

current_path = os.getcwd()
feature_imp = pd.read_csv(current_path + '/Data/feature_imp.csv',
                          encoding = 'utf-8')
feature_imp.sort_values(by = 'Importances',
                        ascending = False, inplace = True)
feature_auc = pd.read_csv(current_path + '/Data/features_auc.csv',
                          encoding = 'utf-8')
feature_auc['f_idx'] = feature_auc['Unnamed: 0'] + 1

SELECTED_FEATURES = feature_imp['Features'].head(13)
FINAL_FEATURES = feature_imp['Features'].head(5)

feature_df = pd.DataFrame({'selected Features': SELECTED_FEATURES})
feature_df.to_csv(current_path + '/Data/shap_selected_features.csv',
                  encoding = 'utf-8', index = None)

final_feature_df = pd.DataFrame({'features': FINAL_FEATURES})
final_feature_df.to_csv(current_path + '/Data/final_features.csv',
                        encoding = 'utf-8', index = None)

feature_auc['AUC_lower'] = feature_auc['AUC_mean'] - feature_auc['AUC_std']
feature_auc['AUC_upper'] = feature_auc['AUC_mean'] + feature_auc['AUC_std']

fig, ax = plt.subplots(figsize = (6.75, 2.7), dpi = 1200)
plt.grid(axis='y', alpha=0.5)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.rcParams['font.family'] = 'Arial'
palette = sns.color_palette("Blues", n_colors = len(feature_imp))
palette.reverse()
sns.barplot(ax = ax, x = "Features", y = "Importances", palette = palette,
            data = feature_imp)
ax.tick_params(axis = 'y', labelsize = 8)
ax.set_xticklabels(feature_imp['Features'], rotation = 30,
                   fontsize = 8, fontweight='bold',
                   horizontalalignment = 'right')
nb_f = 10
col = ['r']*nb_f + ['k']*(len(feature_imp) - nb_f)
for ticklabel, tickcolor in zip(plt.gca().get_xticklabels(), col):
    ticklabel.set_color(tickcolor)
ax.set_ylabel('Predictor Importance', weight = 'bold', fontsize = 10)
ax.set_xlabel('')
ax.set_axisbelow(True)

ax2 = ax.twinx()
ax2.spines['top'].set_visible(False)
ax2.plot(np.arange(nb_f + 1), feature_auc['AUC_mean'][:nb_f + 1], 'red', 
         alpha = 0.8, marker='o', linewidth=1, markersize=5)
ax2.plot(np.arange(nb_f + 1, len(feature_imp)), 
         feature_auc['AUC_mean'][nb_f + 1:], 'black',
         alpha = 0.8, marker='o', linewidth=1, markersize=5)
ax2.plot([nb_f, nb_f + 1], feature_auc['AUC_mean'][nb_f:nb_f+2], 'black', 
         alpha = 0.8, marker='o', linewidth=1, markersize=5)
plt.fill_between(feature_auc['f_idx']-1, feature_auc['AUC_lower'], 
                 feature_auc['AUC_upper'], color = 'tomato', alpha = 0.2)
ax2.set_ylabel('Cumulative AUROC', weight = 'bold', fontsize = 10)
ax2.tick_params(axis = 'y', labelsize = 8)
fig.tight_layout()
plt.xlim([-.6, len(feature_imp)-.2])
plt.savefig(current_path + '/Results/feature_imp_roc.png', dpi = 1200,
            bbox_inches = 'tight', transparent = True)
plt.savefig(current_path + '/Results/pdf/feature_imp_roc.pdf',
            format='pdf', bbox_inches='tight')
plt.show()