In [1]:
import pandas as pd


# Load the datasets
df_results = pd.read_csv("./2019-2020/reconciliation-20192020.csv")

# Filter out the invalid rows (based on the requirements)
df_results = df_results[(df_results['is_valid'] != False) & (df_results['confidence_score'] >= 75)]

features = df_results['features'].unique()
unique_minutes = df_results['minute_id'].unique()


# Count the total number of references (by taking all references for a single feature per minute)
total_references = df_results.groupby(['minute_id', 'features']).size().reset_index(name='count').shape[0]

print(f"{total_references} references found in {len(unique_minutes)} unique minutes.")
print("\nThis may be significantly smaller than detection \nas we only keeping references which have an identifier \npattern matched")


1933 references found in 178 unique minutes.

This may be significantly smaller than detection 
as we only keeping references which have an identifier 
pattern matched


In [2]:
df_results.head()

Unnamed: 0,minute_id,features,id,text,sentence,direct,hit@1,hit@10,hit@k,mrr,query,candidates,is_valid,confidence_score
0,h-tk-20192020-38-8,text+sentence,32,"32849, nr. 196",Op verzoek van mevrouw Agnes Mulder stel ik vo...,32849-196,0,0,-1,0.0,", nr. Op verzoek van mevrouw Agnes Mulder ste...","[{""id"": ""kst-35340-1"", ""title"": ""Initiatiefnot...",True,100
1,h-tk-20192020-38-8,text+sentence,39,"32849, nr. 197","Aangezien de motie-Sienot/Dik-Faber (32849, nr...",32849-197,0,0,-1,0.0,", nr. Aangezien de motie-Sienot/Dik-Faber (, ...","[{""id"": ""kst-35282-50"", ""title"": ""Wijziging va...",True,100
2,h-tk-20192020-39-51,text+sentence+keywords+summary-doctype,9,26689-1040,Zij krijgt nr. 1040 (26689).,26689-1040,0,0,-1,0.0,Zij krijgt nr. (). motie dossiernummer parlem...,"[{""id"": ""kst-31066-671"", ""title"": ""Belastingdi...",True,95
3,h-tk-20192020-39-51,text+sentence+keywords+summary-doctype,21,29689-1041,Zij krijgt nr. 1041 (29689).,29689-1041,0,0,-1,0.0,Zij krijgt nr. (). motie dossiernummer parlem...,"[{""id"": ""kst-31066-671"", ""title"": ""Belastingdi...",True,95
4,h-tk-20192020-39-51,text+sentence+keywords+summary-doctype,22,26689-1040,"De eerste motie, van de heer Van Gerven c.s., ...",26689-1040,0,0,-1,0.0,"De eerste motie, van de heer Van Gerven c.s., ...","[{""id"": ""kst-34682-39"", ""title"": ""Nationale Om...",True,95


In [9]:
import numpy as np
from scipy import stats

# Prepare the metrics dictionary
reconciliation_metrics_data = {
    'Features': [],
    'Hit@1 (95% CI)': [],
    'Hit@10 (95% CI)': [],
    'MRR': [],
    'Hit@k': []
}

# iterature over all linking systems
for featureset in features:
    # filter the results for the current linking system
    group = df_results[df_results['features'] == featureset]
    n = len(group)

    avg_hit_at_1 = group['hit@1'].mean()
    avg_hit_at_10 = group['hit@10'].mean()
    mrr = group['mrr'].mean()

    # Average position when found (remove any -1)
    avg_hit_at_k = group[group['hit@k'] != -1]['hit@k'].mean()

    # Use t-distribution for more accurate CIs
    t_value = stats.t.ppf(0.975, df=n-1)  # 95% CI

    hit_at_1_ci = t_value * np.std(group['hit@1'], ddof=1) / np.sqrt(n)
    hit_at_10_ci = t_value * np.std(group['hit@10'], ddof=1) / np.sqrt(n)
    mrr_stderr = np.std(group['mrr'], ddof=1) / np.sqrt(n)

    reconciliation_metrics_data['Features'].append(featureset)
    reconciliation_metrics_data['Hit@1 (95% CI)'].append(f"{avg_hit_at_1:.2f} ± {hit_at_1_ci:.2f}")
    reconciliation_metrics_data['Hit@10 (95% CI)'].append(f"{avg_hit_at_10:.2f} ± {hit_at_10_ci:.2f}")
    reconciliation_metrics_data['MRR'].append(f"{mrr:.2f} ± {mrr_stderr:.2f}")
    reconciliation_metrics_data['Hit@k'].append(f"{avg_hit_at_k:.2f}")



total_df = pd.DataFrame(reconciliation_metrics_data)

print(f'Reconciliation metrics for all systems for all references (N={total_references})')
print(total_df.to_markdown(index=False, floatfmt='.4f'))


Reconciliation metrics for all systems for all references (N=1933)
| Features                                    | Hit@1 (95% CI)   | Hit@10 (95% CI)   | MRR         |   Hit@k |
|:--------------------------------------------|:-----------------|:------------------|:------------|--------:|
| text+sentence                               | 0.03 ± 0.01      | 0.06 ± 0.01       | 0.04 ± 0.01 |  2.5600 |
| text+sentence+keywords+summary-doctype      | 0.33 ± 0.03      | 0.54 ± 0.03       | 0.40 ± 0.01 |  2.0900 |
| text+sentence+keywords-year                 | 0.30 ± 0.03      | 0.55 ± 0.03       | 0.38 ± 0.01 |  2.4200 |
| text+sentence+keywords+summary-year+doctype | 0.36 ± 0.03      | 0.56 ± 0.03       | 0.43 ± 0.01 |  2.0000 |
| text                                        | 0.00 ± 0.00      | 0.00 ± 0.00       | 0.00 ± 0.00 |  4.5000 |
| text+sentence+keywords+summary-year         | 0.32 ± 0.03      | 0.55 ± 0.03       | 0.40 ± 0.01 |  2.2800 |
| text+sentence+summary                      

In [6]:
# Show the hits for 'text' linking system
text_df = df_results[df_results['features'] == 'text']
text_df = text_df[text_df['hit@10'] > 0]
print(f'\nReconciliation metrics for "text" linking system (N={len(text_df)})')
print(text_df[['minute_id', 'query', 'direct', 'hit@1', 'hit@10', 'candidates']].to_markdown(index=False, floatfmt='.4f'))


Reconciliation metrics for "text" linking system (N=2)
| minute_id           | query                | direct    |   hit@1 |   hit@10 | candidates                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      

In [None]:
# Showcase some examples of the text vs query
print("\nExamples of text vs query:")



Examples of text vs query:


NameError: name 'text_df' is not defined

In [17]:
df_results[["text", "sentence", "query"]][300:350]

Unnamed: 0,text,sentence,query
300,nr. 534 (24515),Zij krijgt nr. 534 (24515).,nr. () Zij krijgt nr. (). preventie schulden...
301,nr. 535 (24515),Zij krijgt nr. 535 (24515).,nr. () Zij krijgt nr. (). jongerenuitkering ...
302,nr. 536 (24515),Zij krijgt nr. 536 (24515).,nr. () Zij krijgt nr. (). minimumloon armoed...
303,nr. 537 (24515),Zij krijgt nr. 537 (24515).,nr. () Zij krijgt nr. (). coronacrisis armoe...
304,nr. 538 (24515),Zij krijgt nr. 538 (24515).,nr. () Zij krijgt nr. (). gemeenten coronacr...
305,nr. 539 (24515),Zij krijgt nr. 539 (24515).,nr. () Zij krijgt nr. (). regeerakkoord armo...
306,nr. 540 (24515),Zij krijgt nr. 540 (24515).,nr. () Zij krijgt nr. (). zorginstelling sam...
307,nr. 541 (24515),Zij krijgt nr. 541 (24515).,nr. () Zij krijgt nr. (). kostendelersnorm a...
308,nr. 542 (24515),Zij krijgt nr. 542 (24515).,nr. () Zij krijgt nr. (). vermogensnorm kwij...
309,nr. 543 (24515),Zij krijgt nr. 543 (24515).,nr. () Zij krijgt nr. (). prostitutie zzp'er...
