# Load in the confusion matrix

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import gridspec
import numpy as np

In [None]:
df = pd.read_csv("../../outputs/v01_confusion-data.csv")
labels = pd.read_json('/bask/homes/f/fspo1218/amber/data/gbif_costarica/01_costarica_data_numeric_labels.json', typ='series')

In [None]:
df.head()

**Key 🗝️**

F= family; 
G= genus; 
S=species

In [None]:
# Calculate accuracy
accuracy = accuracy_score(df['F_Truth'], df['F_Prediction'])
print(f'Family Accuracy: {accuracy:.4f}')

accuracy = accuracy_score(df['G_Truth'], df['G_Prediction'])
print(f'Genus Accuracy: {accuracy:.4f}')

accuracy = accuracy_score(df['S_Truth'], df['S_Prediction'])
print(f'Species Accuracy: {accuracy:.4f}')

In [None]:
s_lab = labels['species_list']
g_lab = labels['genus_list']
f_lab = labels['family_list']

s_lab = {index: label for index, label in enumerate(s_lab)}
g_lab = {index: label for index, label in enumerate(g_lab)}
f_lab = {index: label for index, label in enumerate(f_lab)}

In [None]:
df['species_truth'] = df['S_Truth'].map(s_lab)
df['species_prediction'] = df['S_Prediction'].map(s_lab)

df['genus_truth'] = df['G_Truth'].map(g_lab)
df['genus_prediction'] = df['G_Prediction'].map(g_lab)

df['family_truth'] = df['F_Truth'].map(f_lab)
df['family_prediction'] = df['F_Prediction'].map(f_lab)

In [None]:
from matplotlib.colors import LogNorm, Normalize
from matplotlib.ticker import MaxNLocator


def confusion_matrix_kg(tax_type="species"):
    cm = confusion_matrix(df[(tax_type[0]).upper() + '_Truth'], df[(tax_type[0]).upper() + '_Prediction'])
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=False, fmt='d', cmap='Blues', cbar=True, norm=LogNorm(),
                xticklabels=df[tax_type + '_truth'].unique(),
                yticklabels=df[tax_type + '_truth'].unique())
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()

In [None]:
confusion_matrix_kg("family")

In [None]:
confusion_matrix_kg("genus")

In [None]:
confusion_matrix_kg("species")

In [None]:
def labelled_heatmap(tax_type):

    conf_matrix = pd.crosstab(df[tax_type + '_truth'], df[tax_type + '_prediction'], 
                              rownames=['Actual'], colnames=['Prediction'], dropna=False)

    mapping_dict = df.set_index(tax_type + '_truth')['family_truth'].to_dict()
    labels = conf_matrix.index.map(mapping_dict) #np.random.random_integers(0,5, size=matrix.shape[0])
    lut = dict(zip(set(labels), sns.hls_palette(len(set(labels)), l=0.5, s=0.8)))
    row_colors = (labels).map(lut)

    g=sns.clustermap(conf_matrix, col_cluster=True, row_cluster=True, linewidths=0.1, 
                     norm=LogNorm(), dendrogram_ratio=0.1,
                     cmap='Blues', row_colors=[row_colors])
    plt.show()

In [None]:
labelled_heatmap('genus')

In [None]:
labelled_heatmap('species')