In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
feat_imp1 = pd.read_csv('feature_imp1.csv')
feat_imp2 = pd.read_csv('feature_imp2.csv')

In [3]:
def process_imp(df, imp_colname):
    df = df.sort_values(by=imp_colname, ascending=False).reset_index(drop=True)
    df['relative_imp'] = df[imp_colname] * 1.0 / df[imp_colname].max()
    df['relative_imp'] = df['relative_imp'].apply(lambda x : round(x, 3))
    df['feat_rank'] = df.index.values + 1
    return df

In [4]:
feat_imp1 = process_imp(feat_imp1, 'imp')
feat_imp2 = process_imp(feat_imp2, 'imp')

In [24]:
color_red = '#d53e4f'
color_dark_red = '#67000d'
color_green = '#66c2a5'
color_dark_green = '#00441b'
color_gray = '#969696'
color_dark_gray = '#525252'
color_blue = '#74a9cf'
color_dark_blue = '#08306b'

def rank2color(x):
    if x['feat_rank_x'] < x['feat_rank_y']:
        return color_red
    if x['feat_rank_x'] >= x['feat_rank_y']:
        return color_green
    if pd.isnull(x['feat_rank_y']):
        return color_gray
    if pd.isnull(x['feat_rank_x']):
        return color_blue
    
def rank2fontcolor(x):
    if x['feat_rank_x'] < x['feat_rank_y']:
        return color_dark_red
    if x['feat_rank_x'] >= x['feat_rank_y']:
        return color_dark_green
    if pd.isnull(x['feat_rank_y']):
        return color_dark_gray
    if pd.isnull(x['feat_rank_x']):
        return color_dark_blue
    
def get_mark(x):
    if pd.isnull(x['feat_rank_y']) or pd.isnull(x['feat_rank_x']):
        return "0"
    else:
        return "1"

def merge_feat_imp(df1, df2, feat_colname, top_n=None):
    df1['pos'] = 'left'
    df2['pos'] = 'right'
    if top_n:
        both_imp = df1.head(20).merge(df2.head(20), on=feat_colname, how='outer')
    else:
        both_imp = df1.merge(df2, on=feat_colname, how='outer')
        
    both_imp['bar_color'] = both_imp.apply(lambda x : rank2color(x), axis=1)
    both_imp['font_color'] = both_imp.apply(lambda x : rank2fontcolor(x), axis=1)
    both_imp['bar_mark'] = both_imp.apply(lambda x : get_mark(x), axis=1)
    
    return both_imp

In [25]:
top_n = 20
both_imp = merge_feat_imp(feat_imp1, feat_imp2, 'feat_name', top_n=top_n)

## prepare data for the graph

In [26]:
both_imp.columns.values

array(['feat_name', 'imp_x', 'relative_imp_x', 'feat_rank_x', 'pos_x',
       'imp_y', 'relative_imp_y', 'feat_rank_y', 'pos_y', 'bar_color',
       'font_color', 'bar_mark'], dtype=object)

In [28]:
bar_left_data = both_imp[['feat_name', 'relative_imp_x', 'pos_x', 'bar_color', 'font_color', 'bar_mark']
                        ].dropna().sort_values('relative_imp_x', ascending=False)
bar_left_data.columns = [col.replace('_x', '') for col in bar_left_data.columns.values]

bar_right_data = both_imp[['feat_name', 'relative_imp_y', 'pos_y', 'bar_color', 'font_color', 'bar_mark']
                         ].dropna().sort_values('relative_imp_y', ascending=False)
bar_right_data.columns = [col.replace('_y', '') for col in bar_right_data.columns.values]

line_data = both_imp[['feat_name', 'bar_color', 'feat_rank_x', 'feat_rank_y']].dropna()[['feat_name', 'bar_color']]

In [29]:
import json
import jinja2

In [30]:
# render the output
temp = open('bar_chart_new.html').read()
template = jinja2.Template(temp)

with open('bar_chart_gan.html', 'wb') as fh:
    fh.write(template.render({'bar_left_data': bar_left_data.to_dict('records'), 
                              'bar_right_data': bar_right_data.to_dict('records'), 
                              'line_data': line_data.to_dict('records')}))