In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
feat_imp1 = pd.read_csv('feature_imp1.csv')
feat_imp2 = pd.read_csv('feature_imp2.csv')

In [3]:
def process_imp(df, imp_colname):
    df = df.sort_values(by=imp_colname, ascending=False).reset_index(drop=True)
    df['relative_imp'] = df[imp_colname] * 1.0 / df[imp_colname].max()
    df['relative_imp'] = df['relative_imp'].apply(lambda x : round(x, 3))
    df['feat_rank'] = df.index.values + 1
    return df

In [4]:
feat_imp1 = process_imp(feat_imp1, 'imp')
feat_imp2 = process_imp(feat_imp2, 'imp')

In [6]:
color_red = '#d53e4f'
color_green = '#abdda4'
color_gray = '#969696'
color_blue = '#3288bd'

def rank2color(x):
    if x['feat_rank_x'] < x['feat_rank_y']:
        return color_red
    if x['feat_rank_x'] >= x['feat_rank_y']:
        return color_green
    if pd.isnull(x['feat_rank_y']):
        return color_gray
    if pd.isnull(x['feat_rank_x']):
        return color_blue

def merge_feat_imp(df1, df2, feat_colname, top_n=None):
    if top_n:
        both_imp = df1.head(20).merge(df2.head(20), on=feat_colname, how='outer')
    else:
        both_imp = df1.merge(df2, on=feat_colname, how='outer')
        
    both_imp['bar_color'] = both_imp.apply(lambda x : rank2color(x), axis=1)
    return both_imp

In [7]:
top_n = 20
both_imp = merge_feat_imp(feat_imp1, feat_imp2, 'feat_name', top_n=top_n)

In [8]:
both_imp

Unnamed: 0,feat_name,imp_x,relative_imp_x,feat_rank_x,imp_y,relative_imp_y,feat_rank_y,bar_color
0,feat_70,430.755799,1.0,1.0,180.071612,0.852,2.0,#d53e4f
1,feat_27,175.000243,0.406,2.0,28.995664,0.137,18.0,#d53e4f
2,feat_67,162.190462,0.377,3.0,151.574453,0.717,3.0,#abdda4
3,feat_43,159.114082,0.369,4.0,90.617439,0.429,7.0,#d53e4f
4,feat_79,139.530246,0.324,5.0,128.742953,0.609,5.0,#abdda4
5,feat_4,113.18284,0.263,6.0,113.584959,0.537,6.0,#abdda4
6,feat_16,109.927573,0.255,7.0,146.735573,0.694,4.0,#abdda4
7,feat_72,96.304853,0.224,8.0,211.432107,1.0,1.0,#abdda4
8,feat_75,65.347058,0.152,9.0,,,,#969696
9,feat_61,65.300436,0.152,10.0,72.633466,0.344,8.0,#abdda4


## prepare data for the graph

In [9]:
both_imp.columns.values

array(['feat_name', 'imp_x', 'relative_imp_x', 'feat_rank_x', 'imp_y',
       'relative_imp_y', 'feat_rank_y', 'bar_color'], dtype=object)

In [13]:
bar_left_data = both_imp[['feat_name', 'relative_imp_x', 'feat_rank_x', 'bar_color']].dropna().sort_values('feat_rank_x')
bar_right_data = both_imp[['feat_name', 'relative_imp_y', 'feat_rank_y', 'bar_color']].dropna().sort_values('feat_rank_y')
line_data = both_imp[['feat_name', 'feat_rank_x', 'feat_rank_y', 'bar_color']].dropna()

In [14]:
import json
import jinja2

In [15]:
# render the output
temp = open('bar_chart_new.html').read()
template = jinja2.Template(temp)

with open('bar_chart_gan.html', 'wb') as fh:
    fh.write(template.render({'bar_left_data': bar_left_data.to_dict('records'), 
                              'bar_right_data': bar_right_data.to_dict('records'), 
                              'line_data': line_data.to_dict('records')}))