In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score

from sklearn.metrics import precision_recall_curve
import itertools

import matplotlib.pyplot as plt
get_ipython().magic(u'matplotlib inline')

In [2]:
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):
    cm_norm = cm.astype('float')/cm.sum(axis=1)[:, np.newaxis]
    plt.imshow(cm_norm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(2)
    plt.xticks(tick_marks, ['failure', 'success'], rotation=45)
    plt.yticks(tick_marks, ['failure', 'success'])
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    return

In [3]:
def plot_pr_curve(df, pred_col, gt_col, score_col, show=True):
    p_r_thresh = {}
    pr_curve_col = 'pr_curve_{0}'.format(pred_col)
    df[pr_curve_col] = 1*(df[pred_col] == df[gt_col])
    
    _colors = itertools.cycle(['orangered', 'yellowgreen', 'royalblue', 
                               'darkorchid', 'goldenrod', 'teal', 'seagreen', 'firebrick'])
    if show:
        fig = plt.figure(figsize=(10,8))

    for i, grp in df.groupby(pred_col):
        try: 
            _precision, _recall, _thresholds = precision_recall_curve(grp[pr_curve_col], grp[score_col])
            _recall = _recall*(len(grp[grp[pred_col]==grp[gt_col]])/(1.0*len(df[df[gt_col]==i])))
            p_r_thresh[i] = _precision, _recall, _thresholds
        except ZeroDivisionError:
            #print "ZeroDivisionError for i = %i with length %i" %(i, len(grp[grp[gt_col]==i]))
            _precision, _recall, _thresholds = [0,0],[0,0],[0]
        except IndexError:
            #print "IndexError"
            _precision, _recall, _thresholds = [0,0],[0,0],[0]
        
        if show:
            plt.plot(_recall, _precision, lw = 1, color = next(_colors), label = grp[pred_col].unique()[0])
            plt.xlabel('recall')
            plt.ylabel('precision')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
        
    if show:
        plt.axes().set_aspect('equal')
        plt.grid(axis='both',  # set both grid lines
               linestyle='--',  # use dashed lines
               which='major',  # only major ticks
               color='gray',  # line colour
               alpha=0.7, )  # make lines semi-translucent
        plt.legend(loc='lower right')
    return p_r_thresh

In [4]:
def confusion_heatmap_normed(before_conf_cnt, 
                      after_conf_cnt, 
                      plot_metric = 'recall',
                      plot_delta = False, 
                      feature = 'roof_condition',
                      title = ''):
    if plot_metric == 'recall':
        label_sums = before_conf_cnt.sum(axis = 1)
    elif plot_metric == 'precision':
        label_sums = before_conf_cnt.sum(axis = 0)
    else: return "plot metric not recognized. Choose 'precision' or 'recall'"
    
    num_subplots = 2
    data_to_plot = {}
    
    # if plotting the change, we only need one plot
    if plot_delta:
        
        diff = after_conf_cnt.values - before_conf_cnt.values
        data_to_plot[0] = diff / (1.0 * before_conf_cnt.sum(axis = 0)[:, np.newaxis])
        data_to_plot[1] = diff / (1.0 * before_conf_cnt.sum(axis = 1)[:, np.newaxis])
        
        heatmap_palette = plt.cm.coolwarm_r
        scale = np.amax(np.absolute(data_to_plot[0]))
        vmin, vmax = -scale, scale
    else:
        data_to_plot[0] = before_conf_cnt.values / (1.0* label_sums[:, np.newaxis])
        data_to_plot[1] = after_conf_cnt.values / (1.0* label_sums[:, np.newaxis])
        heatmap_palette = plt.cm.ocean_r
        vmin, vmax = 0, 1.0
        
    # set up the figure
    fig, ax = plt.subplots(nrows = 1, ncols = 2, squeeze = True)        
    column_labels = [x[1] for x in after_conf_cnt.columns]
    row_labels = after_conf_cnt.index

    for i in range(0, num_subplots):
        
        heatmap = ax[i].pcolor(data_to_plot[i], cmap=heatmap_palette, vmin = vmin, vmax = vmax)

        fig = plt.gcf()
        fig.set_size_inches(18, 7)

        ax[i].set_frame_on(False)
        ax[i].set_xticks(np.arange(len(column_labels))+0.5, minor=False)
        ax[i].set_yticks(np.arange(len(row_labels))+0.5, minor=False)

        ax[i].set_ylabel('True Label', fontsize = 18)
        ax[i].set_xlabel('Predicted Label', fontsize = 18)

        ax[i].invert_yaxis()

        ax[i].set_xticklabels(row_labels, minor=False, fontsize=12, rotation=90)
        ax[i].set_yticklabels(column_labels, minor=False, fontsize=12)
        
        filename = 'metaclassifier_{0}_{1}{2}.png'.format(feature, plot_metric, '' if plot_delta == False else '_delta')
        title = '{0}: confusion matrix\n{1} metaclassifier: {2}'.format(feature, 
                                                                        'delta due to' if plot_delta == True else 'before' if i==0 else 'after',
                                                                       plot_metric if plot_delta == False else 'recall' if i == 1 else 'precision')
        ax[i].set_title(title, fontsize=18, y = 1.1)

        fig.colorbar(heatmap,ax=ax[i])
        plt.savefig('../figures/{0}'.format(filename))