In [79]:
import numpy as np
import pandas as pd
from scipy.stats import chisquare

def chi2_normalized_fixed(counts):
    print(f"Chi2 normalized fixed: {counts}")
    counts = np.array(counts)
    n = counts.sum()
    print(f"Total counts: {n}")
    k = len(counts)
    print(f"Number of categories: {k}")
    if n == 0 or k <= 1:
        return 1.0  # accordo perfetto
    
    expected = np.full(k, n / k)
    print(f"Expected counts: {expected}")
    chi2_stat, _ = chisquare(f_obs=counts, f_exp=expected)
    print(f"Chi2 statistic: {chi2_stat}")
    a = (chi2_stat / n)
    print(f"Normalized agreement (a): {a}")
    return max(0.0, min(1.0, a))

def compute_p_index_for_message_fixed(df_message, category, group_map):
    df_message = df_message.copy()
    df_message['group'] = df_message['annotator'].map(group_map)
    
    labels = sorted(df_message[category].unique())
    
    counts_global = df_message[category].value_counts().reindex(labels, fill_value=0).values
    print("Calcolo accordo globale...")
    aG = chi2_normalized_fixed(counts_global)
    print(f"Accordo globale a(G): {aG:.4f}")
    print('\n')

    groups = df_message['group'].unique()
    k = len(groups)
    
    aGw_list = []
    for g in groups:
        counts_group = df_message[df_message['group'] == g][category].value_counts().reindex(labels, fill_value=0).values
        print(f"Calcolo accordo per il gruppo {g}...")
        aGw = chi2_normalized_fixed(counts_group)
        aGw_list.append(aGw)
        print(f"Accordo gruppo {g}: {aGw:.4f}")
        print('\n')
    
    p_index = (1/k) * sum(aGw * (1 - aG) for aGw in aGw_list)
    print(f"Accordo globale a(G): {aG:.4f}")
    print(f"P-index: {p_index:.4f}")
    return p_index
# --- Esempio di test ---

# Dati di esempio
data = {
    'annotator': ['Alessandro', 'Michele', 'Michael', 'Elisa', 'Cristina', 'Eliana'],
    'EUPHEMISM': [0, 0, 0, 1, 1, 1]  # gruppo A: tutti 0, gruppo B: tutti 1
}
df = pd.DataFrame(data)

group_map = {
    'Alessandro': 'A',
    'Michele': 'A',
    'Michael': 'A',
    'Elisa': 'B',
    'Cristina': 'B',
    'Eliana': 'B'
}


compute_p_index_for_message_fixed(df, 'EUPHEMISM', group_map)


Calcolo accordo globale...
Chi2 normalized fixed: [3 3]
Total counts: 6
Number of categories: 2
Expected counts: [3. 3.]
Chi2 statistic: 0.0
Normalized agreement (a): 0.0
Accordo globale a(G): 0.0000


Calcolo accordo per il gruppo A...
Chi2 normalized fixed: [3 0]
Total counts: 3
Number of categories: 2
Expected counts: [1.5 1.5]
Chi2 statistic: 3.0
Normalized agreement (a): 1.0
Accordo gruppo A: 1.0000


Calcolo accordo per il gruppo B...
Chi2 normalized fixed: [0 3]
Total counts: 3
Number of categories: 2
Expected counts: [1.5 1.5]
Chi2 statistic: 3.0
Normalized agreement (a): 1.0
Accordo gruppo B: 1.0000


Accordo globale a(G): 0.0000
P-index: 1.0000


1.0