# Cohen Kappa Score

Cohen Kappa score measures the inter-annotator agreement. It is defined as:

$$
k = \frac{(\rho_o - \rho_e)}{(1-\rho_e)}
$$

- $\rho_0$ is the empirical probability of agreement on the label assigned to any sample.
- $\rho_e$ is the expected probability of agreement on the label when assigned randomly.

In [None]:
from causation.utils import fileuploader 

finput, data = fileuploader('.xlsx')
finput

In [None]:
import pandas as pd

df = pd.read_excel(data.get('data'), header=1)
df.head()

### Calculate Cohen Kappa per 200 examples

In [None]:
from sklearn.metrics import cohen_kappa_score

clazzes = ['(DE)', '(SE)', '(NA)', '(H&D)']
interval = 200
parts = int(len(df)/interval)

kappas = {clazz: list() for clazz in clazzes}
for i in range(parts):
    for clazz in clazzes:
        first = df.loc[i:(i+1)*interval, clazz].to_numpy()
        second = df.loc[i:(i+1)*interval, f"{clazz}.1"].to_numpy()
        kappa = cohen_kappa_score(first, second)
        kappas[clazz].append(kappa)

assert len(kappas.keys()) == len(clazzes), "Mismatched number of classes."
assert sum([len(kappa) for kappa in kappas.values()])/len(kappas) == parts, "Mismatched number of intervals."
"Passed, please continue."

In [None]:
import srsly
from pathlib import Path
from datetime import datetime

now = datetime.now().strftime(format="%Y-%m-%d_%H-%M-%S")
output_dir = Path(f"./.kappa-{now}")
output_dir.mkdir(exist_ok=False)
srsly.write_json(output_dir.joinpath("kappas.json"), kappas)
df.to_excel(output_dir.joinpath("annotations.xlsx"))
_ = plt.savefig(output_dir.joinpath('plot.png'), format='png')

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(8,5))
for clazz, kappa in kappas.items():
    plt.plot(list([t*200 for t in range(1, parts+1)]), kappa, label=clazz, marker='o')

plt.title("GEF: Cohen Kappa")
plt.xlabel('examples')
plt.ylabel('cohen kappa')
plt.grid(True)
plt.legend()
_ = plt.savefig(output_dir.joinpath('plot.png'), format='png')
plt.show()

In [None]:
import os
import zipfile
import panel as pn
pn.extension()


file_names = output_dir.glob("*")
zfname = Path(f'{now}-kappa.zip')
with zipfile.ZipFile(zfname, 'w') as zipf:
    for file_name in file_names:
        zipf.write(file_name, arcname=os.path.basename(file_name))
print(f"Saved as {zfname}.\nClick below to download.")
pn.widgets.FileDownload(file=str(zfname), filename=zfname.name)