To compare rank-order changes in the weights of our machine learning fits, we want to make a slopegraph. This is an approach described by Tufte where e.g. two rank orders are listed as columns, side by side, as text descriptions (with numerical values listed as well). Lines are then drawn to connect the categories listed, demonstrating the way in which rank order shifted.

The code below was grabbed and modified from this Github repo: (https://github.com/mhlinder/tufte-slopegraph) 

In [393]:
# http://www.edwardtufte.com/bboard/q-and-a-fetch-msg?msg_id=0003nk
import numpy as np
import pandas as pd
import matplotlib
from IPython.display import display
matplotlib.use('SVG')
import matplotlib.pyplot as plt

def scale(val, src, dst):
    return ((float(val) - src[0]) / (src[1]-src[0])) * (dst[1]-dst[0]) + dst[0]


def vertplace(j, col, fontsize):
    val = col.iloc[j]
    if j > 0:
        prev = col.iloc[j-1]
        prevy = vertplace(j-1, col)
        curry = scale(val)

        diff = abs(curry-prevy)
        if diff < fontsize:
            return prevy - fontsize
        else:
            return curry
    else:
        return scale(val)


def plot_slopegraph(data_dict, title, abs_vals=False, byval=True):
    """
        Function to generate plot; takes dictionary in form
            {obs_name1: {year1: val1, year2: val2, year3:val3,...}, ...}
        Allows for N obs_names and T ordered classes

        Plot will have an average level in the middle.

    """

    # Get basic stats to use for plotting, labeling, making
    #   data structures
    obs_names = data_dict.keys()
    N = len(obs_names)
    T = len(data_dict[obs_names[0]])
    classes = data_dict[obs_names[0]].keys()
    classes.sort()

    df = pd.DataFrame()
    df['observation'] = obs_names
    for cls in classes:
        df[cls] = [data_dict[obs_name][cls] for obs_name in obs_names]

    # Sort by first column
    if abs_vals:
        df_abs = df.copy()
        df_abs.iloc[:,1:] = np.abs(df_abs.iloc[:,1:])
        df_abs.sort([classes[0]], inplace=True)
        
        df['sort'] = np.abs(df_abs[classes[0]])
        df.sort(['sort'], inplace=True)
        #df.drop('sort', axis=1)
        del df['sort']
    else:
        df.sort([classes[0]], inplace=True)
        df_abs = df.copy()
        df_abs.iloc[:,1:] = np.abs(df.iloc[:,1:])

    ## PLOTTING ##

    # General figure settings
    fig, ax = plt.subplots(facecolor='white', figsize=(6,8),
                           dpi=80)

    x_labs = [''] + classes + ['']
    for ind, lab in enumerate(x_labs): 
        x_labs[ind] = '\n' + str(lab)
    x_vals = np.array(range(len(x_labs)))
    

    # Plot each, looping first through obs, then through time
    color = '-k'

    for i, obs_name in enumerate(obs_names):
        # Plot slope lines
        row = df_abs['observation'] == obs_name
        values = np.array(df[row].iloc[:,1:])[0]
        if byval:
            ys = np.abs(values)
        else:
            rank = df_abs.rank()[row]
            ys = np.array(rank.iloc[:,1:])[0]
        ax.plot(x_vals[1:-1], ys, color)


        # Add text labels
        aligns = ['right', 'left']
        labels = [obs_name + ' ' + str(round(values[0], 2)),
                    str(round(values[1],2)) + ' ' + obs_name]
        
        for j, label in enumerate(labels):
            ax.text(x_vals[j + 1], ys[j], label, 
                    horizontalalignment=aligns[j], 
                    verticalalignment='center')

    ax.set_title(title)
    ax.set_xticks(x_vals)
    ax.set_xticklabels(x_labs, fontsize='16')
    ax.set_frame_on(False)
    ax.tick_params(axis='both', which='both', left='off', right='off',
            labelleft='off', bottom='off', top='off', pad=10)

    plt.show()

In [390]:
# Data from our fits
heuristic_params = {'intersects, near_angle': 2.553956,
                    'intersects^2': 1.961741,
                    'near_angle, intersects_street': -1.310491,
                    'near_angle, same_side': 0.728959,
                    'near_angle, same_block': 0.395454,
                    'intersects': 0.316879,
                    'near_angle': -0.082444,
                    'near_angle, sw_width_diff': -0.050279,
                    'near_distance': 0.003045}

crowdsource_params = {'intersects_street^2': -1.202853,
                      'intersects_street': -0.782623,
                      'intersects, near_angle': 0.633763,
                      'near_angle, same_block': 0.596207,
                      'near_angle, intersects_street': -0.530027,
                      'near_angle, near_distance': -0.024370,
                      'near_angle': -0.021510,
                      'near_distance': -0.005418}

# Remove keys so that we're looking at params > 0.5
heuristic_params.pop('near_distance', None)
heuristic_params.pop('near_angle, sw_width_diff', None)
heuristic_params.pop('near_angle', None)
crowdsource_params.pop('near_distance', None)
crowdsource_params.pop('near_angle', None)


# Do some set operations to see where the params intersect
heuristic_set = set(heuristic_params.keys())
crowdsource_set = set(crowdsource_params.keys())
print 'Intersection: ', heuristic_set.intersection(crowdsource_set)
print 'Difference: ', heuristic_set.symmetric_difference(crowdsource_set)


# Put into format desired by plotting function
params = {key: {} for key in set(heuristic_params.keys() + crowdsource_params.keys())}
for key, value in params.iteritems():
    if key in heuristic_params:
        value['heuristic'] = heuristic_params[key]
    else:
        value['heuristic'] = 0.0
    if key in crowdsource_params:
        value['crowdsourced'] = crowdsource_params[key]
    else:
        value['crowdsourced'] = 0.0

params

Intersection:  set(['near_angle, same_block', 'intersects, near_angle', 'near_angle, intersects_street'])
Difference:  set(['intersects_street', 'intersects^2', 'near_angle, near_distance', 'intersects', 'near_angle, same_side', 'intersects_street^2'])


{'intersects': {'crowdsourced': 0.0, 'heuristic': 0.316879},
 'intersects, near_angle': {'crowdsourced': 0.633763, 'heuristic': 2.553956},
 'intersects^2': {'crowdsourced': 0.0, 'heuristic': 1.961741},
 'intersects_street': {'crowdsourced': -0.782623, 'heuristic': 0.0},
 'intersects_street^2': {'crowdsourced': -1.202853, 'heuristic': 0.0},
 'near_angle, intersects_street': {'crowdsourced': -0.530027,
  'heuristic': -1.310491},
 'near_angle, near_distance': {'crowdsourced': -0.02437, 'heuristic': 0.0},
 'near_angle, same_block': {'crowdsourced': 0.596207, 'heuristic': 0.395454},
 'near_angle, same_side': {'crowdsourced': 0.0, 'heuristic': 0.728959}}

In [391]:
a = plot_slopegraph(params, 'title', abs_vals=True, byval=False)
plt.savefig('./slopegraph_byvalue_source.svg')

[ 8.  2.]
[ 2.  8.]
[ 2.  6.]
[ 2.  4.]
[ 9.  2.]
[ 6.  5.]
[ 7.  9.]
[ 5.  7.]
[ 4.  2.]




In [392]:
b = plot_slopegraph(params, 'title', abs_vals=True, byval=False)
plt.savefig('./slopegraph_byrank_source.svg')