## Plot FCGR and Saliency Maps

In [None]:
import numpy as np 
import matplotlib.pyplot as plt
from supervised_dna.utils import array2img

def plot(array_freq, grad_eval, clade):
    "FCGR vs Saliency Maps"
    fig, axes = plt.subplots(1,2,figsize=(14,5))
    axes[0].imshow(array2img(array_freq), cmap="gray")
    i = axes[1].imshow(grad_eval,cmap="jet",alpha=0.8)
    fig.colorbar(i)
    fig.suptitle(f"FCGR and Saliency Maps | Clade: {clade}")
    axes[0].set_title("FCGR")
    axes[1].set_title("Saliency Map")
    return fig

## Metrics extra Test set 
Extra test set, independent from the training/validation/test

In [2]:
import pandas as pd
from collections import namedtuple
from sklearn.metrics import precision_recall_fscore_support

In [5]:
path_model_preds = "results_nextclade_comparison.csv"
model_preds = pd.read_csv(path_model_preds)

# get ground truth for the sequence
model_preds["ground_truth"] = model_preds["path_fasta"].apply(lambda path: path.split("/")[2])
model_preds.head()

Unnamed: 0.1,Unnamed: 0,path_fasta,prob,pred_class,ground_truth
0,0,data-nextclade-comparison/hCoV-19/GK/hCoV-19_U...,0.899028,GK,GK
1,1,data-nextclade-comparison/hCoV-19/GK/hCoV-19_A...,0.874016,GK,GK
2,2,data-nextclade-comparison/hCoV-19/GK/hCoV-19_E...,0.774449,GK,GK
3,3,data-nextclade-comparison/hCoV-19/GK/hCoV-19_F...,0.827125,GK,GK
4,4,data-nextclade-comparison/hCoV-19/GK/hCoV-19_A...,0.879893,GK,GK


In [7]:
CLADES = ['S','L','G','V','GR','GH','GV','GK']
y_true = model_preds.ground_truth
y_pred = model_preds.pred_class
precision, recall, fscore, support = precision_recall_fscore_support(y_true, y_pred, average=None, labels=CLADES)

list_metrics = []
Metrics = namedtuple("Metrics", ["clade","precision", "recall"])#, "fscore", "support"]
for j,clade in enumerate(CLADES): 
    list_metrics.append(
        Metrics(clade, precision[j], recall[j])
    )

df_metrics = pd.DataFrame(list_metrics)
df_metrics

Unnamed: 0,clade,precision,recall
0,S,0.999568,0.987841
1,L,0.99705,0.98419
2,G,0.853049,0.961325
3,V,0.993096,0.991851
4,GR,0.997341,0.922147
5,GH,0.99151,0.98987
6,GV,0.997538,0.979255
7,GK,0.96406,0.958773


In [9]:
print(df_metrics.to_latex())

\begin{tabular}{llrr}
\toprule
{} & clade &  precision &    recall \\
\midrule
0 &     S &   0.999568 &  0.987841 \\
1 &     L &   0.997050 &  0.984190 \\
2 &     G &   0.853049 &  0.961325 \\
3 &     V &   0.993096 &  0.991851 \\
4 &    GR &   0.997341 &  0.922147 \\
5 &    GH &   0.991510 &  0.989870 \\
6 &    GV &   0.997538 &  0.979255 \\
7 &    GK &   0.964060 &  0.958773 \\
\bottomrule
\end{tabular}



In [11]:
33_000/60/60

9.166666666666666