# Compare and evaluate the performance of different models

In [None]:
# load in the json files

import json
import os
import sys
import re

# load in the json files
def load_json(filename):
    with open(filename, 'r') as f:
        data = json.load(f)
    return data

cr_macro = load_json('./outputs/costarica_resnet_v2.0_macro-accuracy.json')
cr_micro = load_json('./outputs/costarica_resnet_v2.0_micro-accuracy.json')
cr_taxon = load_json('./outputs/costarica_resnet_v2.0_taxon-accuracy.json')

In [None]:
cr_taxon

In [None]:
# plot the results
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('paper')

In [None]:
cr_macro

# For the two entries in the dictionary cr_macro, plot a bar chart to compare the values

# get the Pytorch values
pytorch_macro = cr_macro['Pytorch']
tflite_macro = cr_macro['TFLite']
pytorch_micro = cr_micro['Pytorch']
tflite_micro = cr_micro['TFLite']

# subplots with 2 cols, 1 row
fig, ax = plt.subplots(1, 2, figsize=(10,3))

# add yellow box over the first three bars
ax[0].axvspan(-0.25, 2.62, facecolor='palegoldenrod', alpha=0.5)
ax[0].axvspan(2.63, 5.62, facecolor='lightgreen', alpha=0.5)
ax[0].axvspan(5.63, 8.62, facecolor='lightblue', alpha=0.5)

# add text to the yellow box
ax[0].text(1.25, 105, 'Species', ha='center', va='center', fontsize=10, color='goldenrod')
ax[0].text(4.12, 105, 'Genus', ha='center', va='center', fontsize=10, color='green')
ax[0].text(7.12, 105, 'Family', ha='center', va='center', fontsize=10, color='blue')

# add a bar series for both pytorch and tflite, one to each side of the x-axis
ax[0].bar(np.arange(len(pytorch_macro)), list(pytorch_macro.values()), width=0.3, 
          align='center', color='lightcoral')
ax[0].bar(np.arange(len(tflite_macro))+0.3, list(tflite_macro.values()), 
          width=0.3, align='center', color='dodgerblue')


# add yellow box over the first three bars
ax[1].axvspan(-0.25, 2.62, facecolor='palegoldenrod', alpha=0.5)
ax[1].axvspan(2.63, 5.62, facecolor='lightgreen', alpha=0.5)
ax[1].axvspan(5.63, 8.62, facecolor='lightblue', alpha=0.5)

# add text to the yellow box
ax[1].text(1.25, 105, 'Species', ha='center', va='center', fontsize=10, color='goldenrod')
ax[1].text(4.12, 105, 'Genus', ha='center', va='center', fontsize=10, color='green')
ax[1].text(7.12, 105, 'Family', ha='center', va='center', fontsize=10, color='blue')

ax[1].bar(np.arange(len(pytorch_micro)), list(pytorch_micro.values()), width=0.3, align='center', color='lightcoral')
ax[1].bar(np.arange(len(tflite_micro))+0.3, list(tflite_micro.values()), width=0.3, align='center', color='dodgerblue')

print(len(pytorch_macro.keys()))

# add the x axis ticks and labels
labs = [re.sub("_", " " , x) for x in list(pytorch_macro.keys())]
ax[0].set_xticks(np.arange(len(pytorch_macro.keys()))+1.15)
ax[0].set_xticklabels(labs, rotation=-45, ha='center')



ax[1].set_xticks(np.arange(len(pytorch_macro.keys()))+1.15)
ax[1].set_xticklabels(labs, rotation=-45, ha='center')


#ax[0].set_yticks(size=10)
ax[0].set_ylabel('Accuracy')
ax[1].set_ylabel('Accuracy')
ax[0].set_title('Macro Accuracy')
ax[1].set_title('Micro Accuracy')

# add a legend for the plot for pytorch and tflite and define the colors
ax[1].legend(['Pytorch', 'TFLite'], loc='upper left', bbox_to_anchor=(1,1), 
              facecolor='white', framealpha=1, fontsize=10)

# add a master title
plt.suptitle('Costa Rica Model Performance', fontsize=14, y=1.05)

plt.show()


In [None]:
import pandas as pd
# get the Pytorch values
pytorch_macro = cr_macro['Pytorch']
tflite_macro = cr_macro['TFLite']
pytorch_micro = cr_micro['Pytorch']
tflite_micro = cr_micro['TFLite']

# create a df with a column for pytorch and tflite, and rows for each value in the dict
df = pd.DataFrame(columns=['Value', 'Model', 'Type'])
for k in pytorch_macro.keys():
    df = pd.concat([df, pd.DataFrame([[pytorch_macro[k], "Pytorch", "Macro"]], columns=['Value', 'Model', 'Type'], index=[k])])
    df = pd.concat([df, pd.DataFrame([[tflite_macro[k], "TFLite", "Macro"]], columns=['Value', 'Model', 'Type'], index=[k])])
    df = pd.concat([df, pd.DataFrame([[pytorch_micro[re.sub('macro', 'micro', k)], "Pytorch", "Micro"]], columns=['Value', 'Model', 'Type'], index=[re.sub('macro', 'micro', k)])])
    df = pd.concat([df, pd.DataFrame([[tflite_micro[re.sub('macro', 'micro', k)], "TFLite", "Micro"]], columns=['Value', 'Model', 'Type'], index=[re.sub('macro', 'micro', k)])])

    
# add a column for species, genus, family deepending on the index
df['Taxon'] = df.index.str.split('_').str[1]
df['Measure'] = df.index.str.split('_').str[2]
df





In [None]:
# set up seaborn mutiplots
sns.set_style('whitegrid')
sns.set_context('paper')

# plot results facet by taxon, and model
g = sns.catplot(x='Taxon', y='Value', hue='Model', col='Measure', data=df.loc[df['Type']=='Macro', ], 
                kind='bar', height=4, aspect=1, palette=['lightcoral', 'dodgerblue'])

# add values to top of bars
for ax in g.axes.flat:
    for p in ax.patches:        
        # if the value is in -0.2, 1.8, or 1.2 then light coral else dodger blue
        if p.get_x()+ p.get_width()/2 in [-0.2, 1.8, 1.2]:
            ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%.2f' % p.get_height(), 
                    fontsize=10, color='lightcoral', ha='center', va='bottom')
        else:
            ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%.2f' % p.get_height(), 
                    fontsize=10, color='dodgerblue', ha='center', va='bottom')


g.set_xticklabels(rotation=-45)
g.set_titles('{col_name}')
g.set_axis_labels('', 'Macro Accuracy')
g.fig.suptitle('Costa Rica Model Performance', fontsize=14, y=1.05)
g.fig.subplots_adjust(top=0.8)
plt.show()
g.savefig('./outputs/plots/costarica_macro.png', dpi=300, bbox_inches='tight')

In [None]:
# set up seaborn mutiplots
sns.set_style('whitegrid')
sns.set_context('paper')

# plot results facet by taxon, and model
g = sns.catplot(x='Taxon', y='Value', hue='Model', col='Measure', data=df.loc[df['Type']=='Micro', ], 
                kind='bar', height=4, aspect=1, palette=['lightcoral', 'dodgerblue'])

# add values to top of bars
for ax in g.axes.flat:
    for p in ax.patches:        
        # if the value is in -0.2, 1.8, or 1.2 then light coral else dodger blue
        if p.get_x()+ p.get_width()/2 in [-0.2, 1.8, 1.2]:
            ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%.2f' % p.get_height(), 
                    fontsize=10, color='lightcoral', ha='center', va='bottom')
        else:
            ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%.2f' % p.get_height(), 
                    fontsize=10, color='dodgerblue', ha='center', va='bottom')


g.set_xticklabels(rotation=-45)
g.set_titles('{col_name}')
g.set_axis_labels('', 'Micro Accuracy')
g.fig.suptitle('Costa Rica Model Performance', fontsize=14, y=1.05)
g.fig.subplots_adjust(top=0.8)
#plt.show()
g.savefig('./outputs/plots/costarica_micro.png', dpi=300, bbox_inches='tight')

# Confusion Matrices

In [None]:
# Plot the confusion matrix
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('whitegrid')
sns.set_context('paper')

def create_cf(json_names, str_head='F'): 
    # create a confusion matrix for F_Truth and F_Tf_Prediction
    inp = str_head + '_Truth'
    inp2 = str_head + '_Tf_Prediction'
    confusion = pd.crosstab(conf[inp], conf[inp2], margins=True)
        
    # F=family, G=genus, S=species
    if str_head == 'F':
        names = json_names['family_list']
    elif str_head == 'G':
        names = json_names['genus_list']
    elif str_head == 'S':
        names = json_names['species_list']

    # convert the confusion index to the family names
    confusion.index = [names[x] for x in confusion.index[:-1]] + ['All']
    confusion.columns = [names[x] for x in confusion.columns[:-1]] + ['All']

    return(confusion)

def create_cf_matrix(confusion, spec='', save=False, spec2='', fs=(6.5, 8)):
    plt.figure(figsize=fs)

    gs = plt.GridSpec(2, 2, width_ratios=[1, 0.07], height_ratios=[0.07, 1], wspace=0.01, hspace=0.01)

    # Plot the heatmap with continuous values
    ax_heatmap = plt.subplot(gs[1, 0])
    ax_row_annotation = plt.subplot(gs[1, 1])
    ax_col_annotation = plt.subplot(gs[0, 0])

    conf2 = confusion.iloc[:-1, :-1]
    plt.title(spec + ' Confusion Matrix for ' + spec2 + 'Model')

    # plot the confusion matrix with annotation row for the 'All' column
    g = sns.heatmap(conf2, annot=True, cmap='Blues', cbar=False, ax=ax_heatmap, fmt='.0f', annot_kws={'size': 8})
    g.set_xlabel('Predicted')
    g.set_ylabel('Actual')
    g.set_xticklabels(g.get_xticklabels(), rotation=-45, ha='left')

    ra = sns.heatmap(pd.DataFrame({'feature2': confusion['All'][:-1]}), annot=True,fmt='.0f',
                cmap='Reds', annot_kws={'size': 8}, ax=ax_row_annotation, 
                cbar=False, xticklabels=False, yticklabels=False)
    ra.set_xlabel('Total', rotation=-45, ha='left')


    ca = sns.heatmap(pd.DataFrame({'feature1': confusion.loc['All',][:-1]}).transpose(), 
                cmap='Reds', annot_kws={'size': 8, 'rotation': 90}, 
                ax=ax_col_annotation, annot=True,fmt='.0f',
                cbar=False, xticklabels=False, yticklabels=False)
    ca.set_ylabel('Total')


    if save:
        plt.savefig('./outputs/plots/' + spec2 + '_confusion_' + spec + '.png', dpi=300, bbox_inches='tight')
    else:
        plt.show()

In [None]:
# load in the confusion data
import pandas as pd

conf = pd.read_csv('./outputs/costarica_resnet_v2.0_confusion-data.csv')

# read in the family names from the json
with open("/bask/homes/f/fspo1218/amber/data/gbif_costarica/01_costarica_data_numeric_labels.json", 'r') as f:
    json_names = json.load(f)

In [None]:
fam_cf = create_cf(json_names, str_head='F')
#create_cf_matrix(fam_cf, spec = "Family", spec2='Costa Rica', save=True)

gen_cf = create_cf(json_names, str_head='G')
# create_cf_matrix(gen_cf, spec = "Genus", spec2='Costa Rica', save=True)

spec_cf = create_cf(json_names, str_head='S')
# create_cf_matrix(spec_cf, spec = "Species", spec2='Costa Rica', save=True)


In [None]:
conf = pd.read_csv('./outputs/uk_resnet_v2.0_confusion-data.csv')

# read in the family names from the json
with open("/bask/homes/f/fspo1218/amber/data/gbif_uk/01_uk_data_numeric_labels.json", 'r') as f:
    json_names = json.load(f)
    
fam_cf = create_cf(json_names, str_head='F')
create_cf_matrix(fam_cf, spec = "Family", spec2='UK', save=True, fs=(12, 12))

In [None]:
conf = pd.read_csv('./outputs/singapore_resnet_v2.0_confusion-data.csv')

# read in the family names from the json
with open("/bask/homes/f/fspo1218/amber/data/gbif_singapore/01_singapore_data_numeric_labels.json", 'r') as f:
    json_names = json.load(f)
    
fam_cf = create_cf(json_names, str_head='F')
create_cf_matrix(fam_cf, spec = "Family", spec2='Singapore', save=True, fs=(12, 12))