<a href="https://colab.research.google.com/github/Brritany/kappa-value/blob/main/KappaValue.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 生成虛擬資料

In [34]:
import pandas as pd
import numpy as np
import random

# 生成50個不同的ID名
ids = ['ID_' + str(i) for i in range(1, 51)]

# 生成隨機的ID列表
random_ids = []
for _ in range(300):  # 生成300個隨機ID
    random_ids.append(random.choice(ids))

# 生成隨機的評分者數據
data = {
    'ID': random_ids,
    'rater_1': np.random.randint(0, 2, size=300),
    'rater_2': np.random.randint(0, 2, size=300),
    'rater_3': np.random.randint(0, 2, size=300),
    'rater_4': np.random.randint(0, 2, size=300),
    'rater_5': np.random.randint(0, 2, size=300),
    'rater_6': np.random.randint(0, 2, size=300),
}

# 創建DataFrame
df = pd.DataFrame(data)

In [35]:
df.head(5)

Unnamed: 0,ID,rater_1,rater_2,rater_3,rater_4,rater_5,rater_6
0,ID_46,1,0,0,0,1,1
1,ID_44,1,0,0,1,1,1
2,ID_3,1,0,1,1,1,0
3,ID_16,1,0,1,1,0,1
4,ID_20,1,1,0,0,0,0


In [23]:
# df.to_excel('random_sample.xlsx')

## Cohen’s kappa

[sklearn.metrics.cohen_kappa_score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.cohen_kappa_score.html)

```
Kappa係數   一致性程度

< 0.4         差

0.4 ~ 0.6     一般

0.6 ~ 0.8     好

> 0.8         極佳
```

In [36]:
from sklearn.metrics import cohen_kappa_score

# rater_1 vs rater_2
cohen_kappa = cohen_kappa_score(df['rater_1'], df['rater_2'])
cohen_kappa

0.046666666666666634

In [38]:
# rater_4 vs rater_6
cohen_kappa = cohen_kappa_score(df['rater_4'], df['rater_6'])
cohen_kappa

-0.018370607028753927

## Fleiss kappa

Extends Cohen's Kappa to more than 2 raters.

[statsmodels.stats.inter_rater.fleiss_kappa](https://www.statsmodels.org/dev/generated/statsmodels.stats.inter_rater.fleiss_kappa.html)

In [40]:
from statsmodels.stats.inter_rater import fleiss_kappa

def Fleiss_kappa(n_confirm: pd.DataFrame):
        
    df = pd.DataFrame()
    value_counts = n_confirm.apply(pd.value_counts, axis=1)
    for value in value_counts:
        df[value] = value_counts[value]
        df.fillna(value=0, inplace=True)   

    result = fleiss_kappa(np.array(df))
    return result

In [41]:
Fleiss_kappa(df)

-0.008898798762605022

## Kappa Value for each patient

In [45]:
grouped = df.groupby("ID")
kappa_scores = []
for name, group in grouped:
    if group.shape[0] >= 2:
        kappa_1vs2 = cohen_kappa_score(group.iloc[:, 1], group.iloc[:, 2])
        kappa_2vs3 = cohen_kappa_score(group.iloc[:, 2], group.iloc[:, 3])
        kappa_1vs3 = cohen_kappa_score(group.iloc[:, 1], group.iloc[:, 3])
        kappa_scores.append({"ID": name, "1vs2": kappa_1vs2, "2vs3": kappa_2vs3, "1vs3": kappa_1vs3})

each_cohen_kappa = pd.DataFrame(kappa_scores)

  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)


In [46]:
each_cohen_kappa.head(5)

Unnamed: 0,ID,1vs2,2vs3,1vs3
0,ID_1,-0.235294,0.222222,-0.076923
1,ID_10,0.615385,1.0,0.615385
2,ID_11,0.307692,-0.153846,0.1
3,ID_12,0.181818,-0.2,-0.363636
4,ID_13,-0.363636,0.285714,-0.153846


In [42]:
grouped = df.groupby("ID")
kappa_scores = []

for name, group in grouped:
    if group.shape[0] >= 2:
        # 刪除 ID 列，只保留評分列
        group_ratings = group.drop(columns=["ID"])

        # 計算 Fleiss Kappa 值並將其添加到 kappa_scores 列表中
        kappa = Fleiss_kappa(group_ratings)
        kappa_scores.append({"ID": name, "Fleiss Kappa": kappa})

each_fleiss_kappa = pd.DataFrame(kappa_scores)

In [44]:
each_fleiss_kappa.head(5)

Unnamed: 0,ID,Fleiss Kappa
0,ID_1,0.144444
1,ID_10,0.035714
2,ID_11,0.08022
3,ID_12,-0.075325
4,ID_13,-0.125
