In [1]:
import pandas as pd
import numpy as np
import jinja2

In [2]:
feat_imp1 = pd.read_csv('feature_imp1.csv')
feat_imp2 = pd.read_csv('feature_imp2.csv')

In [3]:
color_red = '#d53e4f'
color_dark_red = '#67000d'
color_green = '#66c2a5'
color_dark_green = '#00441b'
color_gray = '#969696'
color_dark_gray = '#525252'
color_blue = '#74a9cf'
color_dark_blue = '#08306b'

def _process_imp(imp_df, imp_name):
    imp_df = imp_df.sort_values(by=imp_name, ascending=False).reset_index(drop=True)
    imp_df['relative_imp'] = imp_df[imp_name] * 1.0 / imp_df[imp_name].max()
    imp_df['relative_imp'] = imp_df['relative_imp'].apply(lambda x : round(x, 3))
    imp_df['feat_rank'] = imp_df.index.values + 1
    return imp_df

def _rank2color(x):
    if x['feat_rank_x'] < x['feat_rank_y']:
        return color_red
    if x['feat_rank_x'] >= x['feat_rank_y']:
        return color_green
    if pd.isnull(x['feat_rank_y']):
        return color_gray
    if pd.isnull(x['feat_rank_x']):
        return color_blue
    
def _rank2fontcolor(x):
    if x['feat_rank_x'] < x['feat_rank_y']:
        return color_dark_red
    if x['feat_rank_x'] >= x['feat_rank_y']:
        return color_dark_green
    if pd.isnull(x['feat_rank_y']):
        return color_dark_gray
    if pd.isnull(x['feat_rank_x']):
        return color_dark_blue
    
def _get_mark(x):
    if pd.isnull(x['feat_rank_y']) or pd.isnull(x['feat_rank_x']):
        return "0"
    else:
        return "1"
    
def _merge_feat_imp(imp_df1, imp_df2, feature_name, top_n):
    imp_df1['pos'] = 'left'
    imp_df2['pos'] = 'right'
    if top_n:
        both_imp = imp_df1.head(top_n).merge(imp_df2.head(top_n), on=feature_name, how='outer')
    else:
        both_imp = imp_df1.merge(imp_df2, on=feature_name, how='outer')
        
    both_imp['bar_color'] = both_imp.apply(lambda x : _rank2color(x), axis=1)
    both_imp['font_color'] = both_imp.apply(lambda x : _rank2fontcolor(x), axis=1)
    both_imp['bar_mark'] = both_imp.apply(lambda x : _get_mark(x), axis=1)
    
    return both_imp

In [46]:
def feature_drift_graph(feat_imp1, feat_imp2, feature_name, imp_name, ds_name1, ds_name2, 
                        top_n=None, max_bar_width=300, bar_height=30, middle_gap=300, fontsize=12):
    feat_imp1 = _process_imp(feat_imp1, imp_name)
    feat_imp2 = _process_imp(feat_imp2, imp_name)
    
    both_imp = _merge_feat_imp(feat_imp1, feat_imp2, feature_name, top_n)
    
    bar_left_data = both_imp[['feat_name', 'relative_imp_x', 'pos_x', 'bar_color', 'font_color', 'bar_mark']
                            ].dropna().sort_values('relative_imp_x', ascending=False)
    bar_left_data.columns = [col.replace('_x', '') for col in bar_left_data.columns.values]

    bar_right_data = both_imp[['feat_name', 'relative_imp_y', 'pos_y', 'bar_color', 'font_color', 'bar_mark']
                             ].dropna().sort_values('relative_imp_y', ascending=False)
    bar_right_data.columns = [col.replace('_y', '') for col in bar_right_data.columns.values]

    line_data = both_imp[['feat_name', 'bar_color', 'feat_rank_x', 'feat_rank_y']].dropna()[['feat_name', 'bar_color']]
    
    legend_data = [
        {'name': 'Drop', 'color': color_red},
        {'name': 'Up & Stable', 'color': color_green},
        {'name': 'Disappear', 'color': color_gray},
        {'name': 'Appear', 'color': color_blue}
    ]
    
    # render the output
    temp = open('feature_drift_template.html').read()
    template = jinja2.Template(temp)

    with open('feature_drift_gan.html', 'wb') as fh:
        fh.write(template.render({'bar_left_data': bar_left_data.to_dict('records'), 
                                  'bar_right_data': bar_right_data.to_dict('records'), 
                                  'line_data': line_data.to_dict('records'), 
                                  'legend_data': legend_data, 
                                  'max_bar_width': max_bar_width, 'bar_height': bar_height, 
                                  'middle_gap': middle_gap, 'fontsize': fontsize,
                                  'ds_name1': ds_name1, 'ds_name2': ds_name2}))

In [48]:
feature_drift_graph(feat_imp1, feat_imp2, 'feat_name', 'imp', 'train', 'test', 
                    top_n=25, max_bar_width=300, bar_height=30, middle_gap=300, fontsize=12)