In [11]:
import jsonlines

def get_actual_pred_labels(pred_file, gold_file):
    ids = []
    rumors = []
    gold_dict_labels = {}
    for line in jsonlines.open(gold_file):
        ids.append(line["id"])
        rumors.append(line['rumor'])
        gold_dict_labels[line["id"]] = line["label"]

    pred = [line for line in jsonlines.open(pred_file)]
    pred_labels = [line["predicted_label"] for line in pred]

    actual_labels = []
    for line in pred:
        actual_labels.append(gold_dict_labels[line["id"]])
    
    return (ids, actual_labels, pred_labels, rumors)

def get_actual_pred_labels_evidence(pred_file, gold_file):
    gold_dict_labels = {}
    gold_dict_evidence = {}
    for line in jsonlines.open(gold_file):
        gold_dict_labels[line["id"]] = line["label"]
        temp_ev = []
        for ev in line["evidence"]:
            temp_ev.append(str(ev[1]))
        gold_dict_evidence[line["id"]] = temp_ev
    pred = [line for line in jsonlines.open(pred_file)]
    pred_labels = [line["predicted_label"] for line in pred]
    pred_evidence = []
    for line in pred:
        pred_instance = []
        for ev in line["predicted_evidence"]:
            pred_instance.append(str(ev[1]))
        pred_evidence.append(pred_instance)
    actual_labels = []
    actual_evidence = []
    for line in pred:
        actual_labels.append(gold_dict_labels[line["id"]])
        actual_instance = []
        for i in gold_dict_evidence[line["id"]]:
            actual_instance.append(i)
        actual_evidence.append(actual_instance)
    return(actual_labels, pred_labels, actual_evidence, pred_evidence)

In [12]:
from clef.utils.scoring import eval_run_custom
from clef.utils.data_loading import task5_dir

import pandas as pd
from IPython.core.display import display_html

dev

In [13]:
# config
root_path = '../../'
out_dir = './data-out/setup2'

sample_submission_file = f'{root_path}/{task5_dir}/submission_samples/KGAT_zeroShot_verification_English_dev.json'

# rq3_nli_submission_file = f'{out_dir}/zeroshot-ver-rq3-nli.jsonl'
rq3_openai_submission_file = f'{out_dir}/zeroshot-ver-rq3-openai.jsonl'

ground_truth_file = f'{root_path}/{task5_dir}/data/English_dev.json'

In [14]:
import pandas as pd
ids, act, pred, rumors = get_actual_pred_labels(rq3_openai_submission_file, ground_truth_file)

# Creating a DataFrame
df = pd.DataFrame({
    'id': ids,
    'actual': act,
    'predicted': pred,
    'rumor_text': rumors
})
pd.set_option('display.max_colwidth', None)

df[df['actual'] != df['predicted']]

Unnamed: 0,id,actual,predicted,rumor_text
9,AuRED_038,REFUTES,NOT ENOUGH INFO,News of a missile passing over Kuwaiti airspace coming from the Iraqi side https://t.co/E8a1KXpf1E
11,AuRED_160,REFUTES,NOT ENOUGH INFO,Hoda Al-Sarari passed away as a result of a traffic accident in Spain https://t.co/gssbphe3OA
27,AuRED_033,NOT ENOUGH INFO,REFUTES,"After his appointment as Vice-President of the Qatari Constitutional Assembly, President Kais Saeed takes the oath before Her Highness Sheikha Moza Al-Misnad in the capital of #Qatar, Doha https://t.co/kCYFDQHYcS"


In [15]:
actual_labels, pred_labels, actual_evidence, pred_evidence = get_actual_pred_labels_evidence(rq3_openai_submission_file, ground_truth_file)
for act, pred in list(zip(actual_evidence,pred_evidence)):
    for a in act:
        if a not in pred:
            print(a)

1234715165767147523
1233784722238705670
1304111096949866497
1341840863358554115
1341782311889723401
1341779925326581761
1341429691417305091
1340648347502342145
1340641178920873985
1340280471306027009
1341324720164261888
1339883857374539778
1342093406395314178
1342067208856489985
1341764926612844546
1341669834329903104
1341504694603157506
1341478274845839362
1340680064506204162
1340259651171065856
1339948400154963970
1339895685752135684
1339878931177086976
1436952831215476739
1403716782276464643
1403741413817438210
1403795269855191040
1608880491989700608


In [16]:
# basic score eval

# time_now  = datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S') 
# out_file = f'{out_dir}/eval/RQ3-{time_now}.csv'

eval_data = [
    # ['baseline',    *eval_run_custom(sample_submission_file, ground_truth_file, '')],
    # ['RQ3-nli',     *eval_run_custom(rq3_nli_submission_file, ground_truth_file, '')],
    ['RQ3-openai',  *eval_run_custom(rq3_openai_submission_file, ground_truth_file, '')],
]

eval_df = pd.DataFrame(eval_data)
eval_df.columns = ['method', 'macro-F1', 'strict-macro-F1']

df_r5  = eval_df[['method', 'macro-F1']].sort_values('macro-F1', axis=0, ascending=False)
df_map = eval_df[['method', 'strict-macro-F1']].sort_values('strict-macro-F1', axis=0, ascending=False)

df1_styler = df_r5.style.set_table_attributes("style='display:inline'").set_caption('macro-F1')
df2_styler = df_map.style.set_table_attributes("style='display:inline'").set_caption('strict-macro-F1')

# eval_df.to_csv(out_file)

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)


Unnamed: 0,method,macro-F1
0,RQ3-openai,0.922963

Unnamed: 0,method,strict-macro-F1
0,RQ3-openai,0.907407


train

In [17]:
# config
root_path = '../../'
out_dir = './data-out/train-setup2'


# rq3_nli_submission_file = f'{out_dir}/zeroshot-ver-rq3-nli.jsonl'
rq3_openai_submission_file = f'{out_dir}/zeroshot-ver-rq3-openai.jsonl'

ground_truth_file = f'{root_path}/{task5_dir}/data/English_train.json'

In [18]:
import pandas as pd
ids, act, pred, rumors = get_actual_pred_labels(rq3_openai_submission_file, ground_truth_file)

# Creating a DataFrame
df = pd.DataFrame({
    'id': ids,
    'actual': act,
    'predicted': pred,
    'rumor_text': rumors
})
pd.set_option('display.max_colwidth', None)

df[df['actual'] != df['predicted']].sort_values('actual')

Unnamed: 0,id,actual,predicted,rumor_text
93,AuRED_016,NOT ENOUGH INFO,REFUTES,"Egypt does not want to give a vaccine to its citizens. The Gulf countries take care of them. Saudi Arabia / the Sultanate of Oman / Qatar refuses their intervention, so there is no one other than Kuwait, a country of humanity that receives them and feeds them. What is the mysterious secret? Kuwait treats Egypt with special treatment https://t.co/tebue00X8y"
20,AuRED_009,NOT ENOUGH INFO,SUPPORTS,Erdogan: My problems with Sisi were due to a misunderstanding and I *seek* to fix it.. Al Jazeera Channel https://t.co/BxNw1zcLg1
24,AuRED_052,NOT ENOUGH INFO,REFUTES,"News about the death of Saudi judo player Tahani Al-Qahtani, who was defeated by an Israeli player https://t.co/MJbPM1EkoO"
62,AuRED_020,NOT ENOUGH INFO,REFUTES,Real Madrid will play in the English Premier League... behind the scenes you know for the first time https://t.co/ggM7fdjry0 https://t.co/uFtO7xUqgt
54,AuRED_013,NOT ENOUGH INFO,REFUTES,Previous statements by the new Minister of Health not to wear a mask! #Jordan #Tell me https://t.co/gr7JSlbHB0
74,AuRED_041,REFUTES,NOT ENOUGH INFO,"Vaccination conspiracy? Do we agree that even if there is a question mark, Zain Telecommunications Company #Kuwait has committed a major mistake that must be corrected quickly?!"
72,AuRED_159,REFUTES,NOT ENOUGH INFO,News about the death of Sheikh Abdul Rahman Al-Sudais #Friday_of_Rage_25_September
70,AuRED_079,REFUTES,NOT ENOUGH INFO,Urgent: Mohamed Salah has been infected with the Corona virus.. Waiting for the result of the second swab to confirm!! .. Salamat ❤️! https://t.co/KrqPgvq9AM
69,AuRED_019,REFUTES,NOT ENOUGH INFO,"“Preventive information from UNICEF regarding the Corona virus ---------1- The Corona virus is large in size, with a cell diameter of 400-500 microns, and for this reason “any mask prevents its entry.” 2- The Corona virus does not settle in the air, but rather descends. to the ground, so it cannot be transmitted through the air.”"
68,AuRED_087,REFUTES,NOT ENOUGH INFO,Schubert admits that Al-Ahly management put pressure on CAF and forced it to change the Gambian arbitration team to a Moroccan arbitration team..!! https://t.co/pNO5HWVbuA


In [19]:
actual_labels, pred_labels, actual_evidence, pred_evidence = get_actual_pred_labels_evidence(rq3_openai_submission_file, ground_truth_file)
for act, pred in list(zip(actual_evidence,pred_evidence)):
    for a in act:
        if a not in pred:
            print(a)

1357211717479116800
1356683687870488576
1356683390733471746
1301079467889131522
1301067546200858626
1300798719751794688
1226482043007066113
1225345358961762304
1226202744475865089
1224702545127407617
1224372681870651392
1224361944959922178
1225075510461583362
1224728227136131073
1224727797043814400
1224725042845704193
1224919598698565633
1224919545057619968
1226177350376685569
1226116927015141376
1225068691332550662
1224546827657048064
1224347860436582400
1224339041534898179
1224153761511198720
1224264648142151680
1224264646913187842
1226159605815947264
1226159298398556160
1226093838357405696
1225418369857413120
1224759855837143040
1222506828694794240
1223649306667778049
1222476311291142145
1222418957833068546
1223651591712014338
1223647517855928326
1222457635909312512
1222382247099150337
1223649510116732930
1222489400422096896
1222421365459968001
1222581824242769920
1222581782421307393
1585012773125120000
1582632003370786816
1608466918561792000
1359595392908861447
1359595347228700676


In [20]:
# basic score eval

# time_now  = datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S') 
# out_file = f'{out_dir}/eval/RQ3-{time_now}.csv'

eval_data = [
    # ['baseline',    *eval_run_custom(sample_submission_file, ground_truth_file, '')],
    # ['RQ3-nli',     *eval_run_custom(rq3_nli_submission_file, ground_truth_file, '')],
    ['RQ3-openai',  *eval_run_custom(rq3_openai_submission_file, ground_truth_file, '')],
]

eval_df = pd.DataFrame(eval_data)
eval_df.columns = ['method', 'macro-F1', 'strict-macro-F1']

df_r5  = eval_df[['method', 'macro-F1']].sort_values('macro-F1', axis=0, ascending=False)
df_map = eval_df[['method', 'strict-macro-F1']].sort_values('strict-macro-F1', axis=0, ascending=False)

df1_styler = df_r5.style.set_table_attributes("style='display:inline'").set_caption('macro-F1')
df2_styler = df_map.style.set_table_attributes("style='display:inline'").set_caption('strict-macro-F1')

# eval_df.to_csv(out_file)

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)


Unnamed: 0,method,macro-F1
0,RQ3-openai,0.784026

Unnamed: 0,method,strict-macro-F1
0,RQ3-openai,0.762686
