# Visualize and print results on test set 

In [None]:
from collections import defaultdict
import pandas as pd 
from pathlib import Path 
from jiwer import wer, cer

pd.options.plotting.backend = "plotly"

def print_results(dfs: list[pd.DataFrame]):
    def to_percent(num):
        return round(num*100, 2)

    for df in dfs:
        df.transcription = df.transcription.apply(lambda x: str(x).strip())
        df["correct_prediction"] = df.transcription == df.ground_truth
        df["wer"] = wer(reference=df.ground_truth.to_list(), hypothesis=df.transcription.to_list())
        df["cer"] = cer(reference=df.ground_truth.to_list(), hypothesis=df.transcription.to_list())

        all_transcriptions = " ".join(df.transcription)
        all_gt = " ".join(df.ground_truth)
        all_wer = wer(hypothesis=all_transcriptions, reference=all_gt)
        all_cer = cer(hypothesis=all_transcriptions, reference=all_gt)
        mean_wer = df.wer.mean()
        mean_cer = df.cer.mean()
        accuracy = len(df[df.correct_prediction])/len(df)
        
        print(f"""{df.model_name[0]}
    Strict accuracy:    {to_percent(accuracy)}%

    Mean WER:           {to_percent(mean_wer)}%
    WER (all concat):   {to_percent(all_wer)}%

    Mean CER:           {to_percent(mean_cer)}%
    CER (all concat):   {to_percent(all_cer)}%\n""")
        

def dfs_to_score_df(dfs: list[pd.DataFrame]) -> pd.DataFrame:
    scores = defaultdict(list)
    for df in dfs:
        df.transcription = df.transcription.apply(lambda x: str(x).strip())
        df["correct_prediction"] = df.transcription == df.ground_truth
        df["wer"] = wer(reference=df.ground_truth.to_list(), hypothesis=df.transcription.to_list())
        df["cer"] = cer(reference=df.ground_truth.to_list(), hypothesis=df.transcription.to_list())

        all_transcriptions = " ".join(df.transcription)
        all_gt = " ".join(df.ground_truth)
        all_wer = wer(hypothesis=all_transcriptions, reference=all_gt)
        all_cer = cer(hypothesis=all_transcriptions, reference=all_gt)
        mean_wer = df.wer.mean()
        mean_cer = df.cer.mean()
        accuracy = len(df[df.correct_prediction])/len(df)

        scores["model"].append(df.model_name[0])
        scores["score"].append("1 - Mean WER")
        scores["value"].append(1-mean_wer)

        scores["model"].append(df.model_name[0])
        scores["score"].append("1 - Mean CER")
        scores["value"].append(1-mean_cer)
        
        scores["model"].append(df.model_name[0])
        scores["score"].append("1 - WER (concat)")
        scores["value"].append(1-all_wer)

        scores["model"].append(df.model_name[0])
        scores["score"].append("1 - CER (concat)")
        scores["value"].append(1-all_cer)

        scores["model"].append(df.model_name[0])
        scores["score"].append("Accuracy")
        scores["value"].append(accuracy)
    return pd.DataFrame(scores)        

In [None]:
### CHANGE THIS ###
results_dirname = "output/"
### ---------- ###

p = Path(results_dirname)
dfs = [pd.read_csv(e) for e in p.iterdir() if e.name.startswith("test") and e.name.endswith("gt.csv")]
print_results(dfs)

In [None]:
score_df = dfs_to_score_df(dfs)
score_df.plot.bar(x='score', y='value', color='model', barmode="group")

In [None]:
model_score_sum = {"model": [], "score_sum": []}
for model, df_ in score_df.groupby("model"):
    score_sum = df_.value.sum()
    model_score_sum["model"].append(model)
    model_score_sum["score_sum"].append(score_sum)

pd.DataFrame(model_score_sum).plot.bar(x="model", y="score_sum", color="model")