In [1]:
import os
import json
import numpy as np
from omegaconf import OmegaConf

In [2]:
exp_dir = "../../../logs/en/asr/evaluate"
folders = {
    "Fine-tuned_9": "version_9",
    "AC_00": "version_39",
    "AC_01": "version_24",
    "AC_02": "version_40",
    "AC_04": "version_41",
    "AC_06": "version_42",
}

In [3]:
# define which accent is standard, which are seen and which are unseen
classes = {
    "seen": ["us", "uk", "ca", "in", "de", "au"],
    "unseen": ["hk", "sg", "sc", "nz", "ie", "za", "ni", "ph"],
}

In [4]:
# load WERs of each file, for each accent
wers = dict()
for exp, folder in folders.items():
    wers[exp] = {k[5:-4]: v for k, v in json.load(open(f"{exp_dir}/{folder}/avg_wers.json")).items()}

In [5]:
# transform dictionary into a numpy array, where one axis represents the accents and another the experiments
experiments = list(wers.keys())
accents = list(wers[experiments[0]].keys())
n_words = np.array([wers[experiments[0]][acc]["n_words"] for acc in accents])
avg_wers = np.array([[wers[exp][acc]["avg_wer"] for exp in experiments] for acc in accents])

In [6]:
# compute means for each class, for each experiment
class_indices = {k: [accents.index(acc) for acc in v] for k, v in classes.items()}
class_means = {k: np.mean(avg_wers[indices], axis=0) for k, indices in class_indices.items()}

In [7]:
# print the avg. WERs and the means (overall and per class) as a markdown table
headers = ["Accent / Dialect"] + experiments
print((" | ").join(headers))
print(f"|{'---:|'*len(headers)}")

# print avg. WERs
for i in range(len(accents)):
    row = [accents[i]] + [f"{avg_wers[i,j]*100:.2f}" for j in range(len(experiments))]
    print((" | ").join(row))

# print overall means
row = ["mean"] + [f"{np.mean(avg_wers[:,i])*100:.2f}" for i in range(len(experiments))]
print((" | ").join(row))

# print class means
for key, value in class_means.items():
    row = [f"{key} mean"] + [f"{value[i]*100:.2f}" for i in range(len(experiments))]
    print((" | ").join(row))

# print worst avg. WER of each experiment
row = ["worst"] + [f"{v*100:.2f}" for v in np.max(avg_wers, axis=0)]
print((" | ").join(row))

# print the avg. of the three worst avg. WERs of each experiment
row = ["3-worst mean."] + [f"{np.mean(np.sort(avg_wers, axis=0)[-3:,i])*100:.2f}" for i in range(len(experiments))]
print((" | ").join(row))


Accent / Dialect | Fine-tuned_9 | AC_00 | AC_01 | AC_02 | AC_04 | AC_06
|---:|---:|---:|---:|---:|---:|---:|
za | 6.54 | 6.62 | 6.54 | 6.74 | 8.04 | 7.68
sg | 10.41 | 10.41 | 10.51 | 10.76 | 13.38 | 12.43
in | 9.21 | 9.27 | 9.41 | 9.42 | 11.67 | 10.88
au | 7.47 | 7.61 | 7.53 | 7.61 | 9.38 | 8.91
hk | 9.76 | 9.84 | 9.75 | 10.12 | 12.72 | 11.44
ie | 7.51 | 7.53 | 7.41 | 7.61 | 9.21 | 8.94
uk | 5.78 | 5.86 | 5.87 | 5.84 | 7.18 | 6.86
ph | 8.90 | 8.81 | 8.79 | 8.98 | 10.90 | 10.52
de | 5.62 | 5.64 | 5.66 | 5.64 | 6.71 | 6.85
ni | 6.96 | 7.23 | 7.02 | 7.23 | 8.49 | 8.06
us | 5.43 | 5.46 | 5.46 | 5.50 | 6.70 | 6.52
ca | 5.36 | 5.37 | 5.35 | 5.41 | 6.46 | 6.32
nz | 5.40 | 5.41 | 5.28 | 5.30 | 7.00 | 6.45
sc | 33.50 | 33.30 | 32.55 | 34.33 | 36.62 | 36.35
mean | 9.13 | 9.17 | 9.08 | 9.32 | 11.03 | 10.59
seen mean | 6.48 | 6.53 | 6.55 | 6.57 | 8.01 | 7.72
unseen mean | 11.12 | 11.14 | 10.98 | 11.38 | 13.29 | 12.73
worst | 33.50 | 33.30 | 32.55 | 34.33 | 36.62 | 36.35
3-worst mean. | 17.89 | 17.

Accent / Dialect | Fine-tuned_9 | AC_00 | AC_01 | AC_02 | AC_04 | AC_06 | X_58_00 | X_65_01 | X_59_02 | X_60_04
|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
za | 6.54 | 6.62 | 6.54 | 6.74 | 8.04 | 7.68 | 6.54 | 6.54 | 6.57 | 7.54
sg | 10.41 | 10.41 | 10.51 | 10.76 | 13.38 | 12.43 | 10.39 | 10.39 | 10.42 | 12.66
in | 9.21 | 9.27 | 9.41 | 9.42 | 11.67 | 10.88 | 9.22 | 9.21 | 9.24 | 10.51
au | 7.47 | 7.61 | 7.53 | 7.61 | 9.38 | 8.91 | 7.46 | 7.46 | 7.48 | 8.66
hk | 9.76 | 9.84 | 9.75 | 10.12 | 12.72 | 11.44 | 9.73 | 9.73 | 9.82 | 11.24
ie | 7.51 | 7.53 | 7.41 | 7.61 | 9.21 | 8.94 | 7.50 | 7.50 | 7.50 | 8.93
uk | 5.78 | 5.86 | 5.87 | 5.84 | 7.18 | 6.86 | 5.77 | 5.77 | 5.76 | 6.73
ph | 8.90 | 8.81 | 8.79 | 8.98 | 10.90 | 10.52 | 8.85 | 8.85 | 8.83 | 10.31
de | 5.62 | 5.64 | 5.66 | 5.64 | 6.71 | 6.85 | 5.62 | 5.61 | 5.58 | 6.42
ni | 6.96 | 7.23 | 7.02 | 7.23 | 8.49 | 8.06 | 6.94 | 6.94 | 7.02 | 8.16
us | 5.43 | 5.46 | 5.46 | 5.50 | 6.70 | 6.52 | 5.45 | 5.45 | 5.46 | 6.27
ca | 5.36 | 5.37 | 5.35 | 5.41 | 6.46 | 6.32 | 5.34 | 5.34 | 5.42 | 6.21
nz | 5.40 | 5.41 | 5.28 | 5.30 | 7.00 | 6.45 | 5.39 | 5.39 | 5.27 | 6.42
sc | 33.50 | 33.30 | 32.55 | 34.33 | 36.62 | 36.35 | 33.66 | 33.67 | 34.21 | 35.26
mean | 9.13 | 9.17 | 9.08 | 9.32 | 11.03 | 10.59 | 9.13 | 9.13 | 9.18 | 10.38
seen mean | 6.48 | 6.53 | 6.55 | 6.57 | 8.01 | 7.72 | 6.48 | 6.48 | 6.49 | 7.47
unseen mean | 11.12 | 11.14 | 10.98 | 11.38 | 13.29 | 12.73 | 11.12 | 11.13 | 11.20 | 12.56
worst | 33.50 | 33.30 | 32.55 | 34.33 | 36.62 | 36.35 | 33.66 | 33.67 | 34.21 | 35.26
3-worst mean. | 17.89 | 17.85 | 17.61 | 18.40 | 20.90 | 20.08 | 17.93 | 17.93 | 18.15 | 19.72

In [8]:
# print the experiment folders of each experiment (both train and eval folders)
print("Experiment folders:\n")
for exp, folder in folders.items():
    eval_folder = os.path.join(exp_dir, folder)
    eval_config = OmegaConf.load(os.path.join(eval_folder, "config.yaml"))
    train_folder = f'../{eval_config.asr.ckpt.replace("/checkpoints/last.ckpt", "")}'
    print(f"- {exp}: training `{train_folder[3:]}`, evaluation `{eval_folder[3:]}`")

Experiment folders:

- Fine-tuned_9: training `logs/en/asr/train/version_11`, evaluation `../../logs/en/asr/evaluate/version_9`
- AC_00: training `logs/en/ensemble/train/binary/b7/DAT/version_81`, evaluation `../../logs/en/asr/evaluate/version_39`
- AC_01: training `logs/en/ensemble/train/binary/b7/DAT/version_66`, evaluation `../../logs/en/asr/evaluate/version_24`
- AC_02: training `logs/en/ensemble/train/binary/b7/DAT/version_82`, evaluation `../../logs/en/asr/evaluate/version_40`
- AC_04: training `logs/en/ensemble/train/binary/b7/DAT/version_83`, evaluation `../../logs/en/asr/evaluate/version_41`
- AC_06: training `logs/en/ensemble/train/binary/b7/DAT/version_84`, evaluation `../../logs/en/asr/evaluate/version_42`
