# Interspeech 2021 results
This notebook displays the results for our Interspeech 2021 paper [1].

[1] A. Keesing, Y. S. Koh, and M. Witbrock, ‘Acoustic Features and Neural Representations for Categorical Emotion Recognition from Speech’, in Interspeech 2021, Aug. 2021, pp. 3415–3419. doi: 10.21437/Interspeech.2021-2217.

In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from Orange.evaluation import graph_ranks
from scipy.stats import friedmanchisquare, rankdata
from statsmodels.stats.libqsturng import qsturng

%matplotlib inline

In [None]:
def run_friedman(table: pd.DataFrame, plot_cd: bool = True):
    """Runs Friedman test on given table and optionally graphs a
    critical-difference diagram.

    Args:
    -----
    table: DataFrame
        The data table, with subjects as rows and independent variable
        as columns.
    """
    _, pvalue = friedmanchisquare(*table.transpose().to_numpy())
    names = list(table.columns)
    avgrank = rankdata(-table.to_numpy(), axis=1).mean(0)
    df = pd.DataFrame(
        {
            "Mean rank": avgrank,
            "Mean": table.mean(),
            "Std. dev.": table.std(),
            "Median": table.median(),
            "MAD": table.mad(),
        },
        index=names,
    ).sort_values("Mean rank")

    topclf = df.index[0]
    n, k = table.shape
    # Effect size is calculated in terms of differences in MAD
    df["Effect size"] = (df.loc[topclf, "Median"] - df["Median"]) / np.sqrt(
        ((n - 1) * df.loc[topclf, "MAD"] ** 2 + (n - 1) * df["MAD"] ** 2) / (2 * n - 2)
    )
    cd = qsturng(0.95, k, np.inf) * np.sqrt((k * (k + 1)) / (12 * n))
    print(f"p = {pvalue}, cd = {cd:.2f} ranks")
    print(df.to_string())
    print()
    if plot_cd:
        graph_ranks(avgrank, names, cd)


## Results using "offline" per-speaker normalisation

In [None]:
dfs = []
for results_csv in Path("./results/norm_offline").glob("**/*.csv"):
    df = pd.read_csv(results_csv)
    # Select columns before taking mean
    df = df[["uar", "war", "microf1", "macrof1"]].mean().to_frame().T
    df["corpus"] = results_csv.parts[-3]
    df["clf"] = results_csv.parts[-2]
    df["features"] = results_csv.stem
    dfs.append(df)
df = pd.concat(dfs)

In [None]:
max_clf = df.pivot_table(values="uar", index="corpus", columns="clf", aggfunc="max")
mean_clf = df.pivot_table(values="uar", index="corpus", columns="clf", aggfunc="mean")
max_feat = df.pivot_table(
    values="uar", index="corpus", columns="features", aggfunc="max"
)
mean_feat = df.pivot_table(
    values="uar", index="corpus", columns="features", aggfunc="mean"
)

print("Friedman test for classifiers by corpus:")
run_friedman(mean_clf)
avgrank = np.argsort(rankdata(-mean_clf.to_numpy(), axis=1).mean(0))
rankclf = mean_clf.columns[avgrank]

print("Friedman test for features by corpus:")
run_friedman(mean_feat)
avgrank = np.argsort(rankdata(-mean_feat.to_numpy(), axis=1).mean(0))
rankfeat = mean_feat.columns[avgrank]

In [None]:
max_clf = max_clf[rankclf]
mean_clf = mean_clf[rankclf]
max_feat = max_feat[rankfeat]
mean_feat = mean_feat[rankfeat]

fig, ax = plt.subplots(1, 2, sharey=True, figsize=(12, 7))
sns.heatmap(max_clf, vmin=0, vmax=1, cmap="Blues", annot=True, ax=ax[0])
ax[0].set_title("Max UAR over features")
sns.heatmap(mean_clf, vmin=0, vmax=1, cmap="Blues", annot=True, ax=ax[1])
ax[1].set_title("Mean UAR over features")

fig, ax = plt.subplots(1, 2, sharey=True, figsize=(20, 7))
sns.heatmap(max_feat, vmin=0, vmax=1, cmap="Blues", annot=True, ax=ax[0])
ax[0].set_title("Max UAR over classifiers")
sns.heatmap(mean_feat, vmin=0, vmax=1, cmap="Blues", annot=True, ax=ax[1])
ax[1].set_title("Mean UAR over classifiers")

## Results using "online" normalisation

In [None]:
dfs = []
for results_csv in Path("./results/norm_online").glob("**/*.csv"):
    df = pd.read_csv(results_csv)
    # Select columns before taking mean
    df = df[["uar", "war", "microf1", "macrof1"]].mean().to_frame().T
    df["corpus"] = results_csv.parts[-3]
    df["clf"] = results_csv.parts[-2]
    df["features"] = results_csv.stem
    dfs.append(df)
df = pd.concat(dfs)

In [None]:
max_clf = df.pivot_table(values="uar", index="corpus", columns="clf", aggfunc="max")
mean_clf = df.pivot_table(values="uar", index="corpus", columns="clf", aggfunc="mean")
max_feat = df.pivot_table(
    values="uar", index="corpus", columns="features", aggfunc="max"
)
mean_feat = df.pivot_table(
    values="uar", index="corpus", columns="features", aggfunc="mean"
)

print("Friedman test for classifiers by corpus:")
run_friedman(mean_clf)
avgrank = np.argsort(rankdata(-mean_clf.to_numpy(), axis=1).mean(0))
rankclf = mean_clf.columns[avgrank]

print("Friedman test for features by corpus:")
run_friedman(mean_feat)
avgrank = np.argsort(rankdata(-mean_feat.to_numpy(), axis=1).mean(0))
rankfeat = mean_feat.columns[avgrank]

In [None]:
max_clf = max_clf[rankclf]
mean_clf = mean_clf[rankclf]
max_feat = max_feat[rankfeat]
mean_feat = mean_feat[rankfeat]

fig, ax = plt.subplots(1, 2, sharey=True, figsize=(12, 7))
sns.heatmap(max_clf, vmin=0, vmax=1, cmap="Blues", annot=True, ax=ax[0])
ax[0].set_title("Max UAR over features")
sns.heatmap(mean_clf, vmin=0, vmax=1, cmap="Blues", annot=True, ax=ax[1])
ax[1].set_title("Mean UAR over features")

fig, ax = plt.subplots(1, 2, sharey=True, figsize=(20, 7))
sns.heatmap(max_feat, vmin=0, vmax=1, cmap="Blues", annot=True, ax=ax[0])
ax[0].set_title("Max UAR over classifiers")
sns.heatmap(mean_feat, vmin=0, vmax=1, cmap="Blues", annot=True, ax=ax[1])
ax[1].set_title("Mean UAR over classifiers")