In [17]:
import pandas as pd
import numpy as np
from sklearn import metrics


# Data Source #1: REBEL + Ablations

In [31]:
data_df = pd.read_pickle('./data/data_post_entailment.pkl')

In [26]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,clean_claim_triples,main_text_triples,clean_main_text_triples,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,verdict,verdict_actual,clean_verdict
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,"[(A baby died at an unnamed medical facility, ...","[(Confederate flag, instance of, racist), (Fie...","[(Confederate flag, instance of, racist), (Fie...","{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...",-1,0,-1
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,"[(Shawnee County, located in the administrativ...","[(KSNT, located in the administrative territor...","[(KSNT, located in the administrative territor...","{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...",-1,1,-1
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,"[(banned pork from school canteens, country, G...","[(Express, country, British), (Express, instan...","[(Express, country, British), (Express, instan...","{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...",-1,-1,-1
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,"[(Coronavirus, country, Canada), (safe drugs, ...","[(heroin, instance of, controlled substances),...","[(heroin, instance of, controlled substance), ...","{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...",-1,1,-1
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,"[(Wayne National Forest, instance of, wildlife...","[(oak forests, located in the administrative t...","[(oak forests, located in the administrative t...","{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...",-1,1,-1


In [35]:
data_df['verdict_actual'] = data_df['label'].apply(lambda label: 1 if label=='true' else (-1 if label=='false' else 0))

In [29]:
def get_verdict(row, low, high, cleaned=True, random=False):
    if random:
        entailment_scores = row['random_entailment_scores_1']
    else:
        if cleaned:
            entailment_scores = row['cleaned_entailment_scores_1']
        else:
            entailment_scores = row['entailment_scores_1']

    index_values = [-1, 1, 0]
    verdict = 0

    for subclaim_entailment_scores in entailment_scores:
        which_verdict = subclaim_entailment_scores.argmax()
        verdict += index_values[which_verdict]*subclaim_entailment_scores[which_verdict]

    if verdict > high:
        return 1
    elif verdict > low:
        return 0
    else:
        return -1

In [42]:
highs = np.arange(-1, 1, 0.01)
lows = np.arange(-1, 1, 0.01)

best_f1_score = -1
best_high = 0.1
best_low = -0.1
for high in highs:
    for low in lows:
        if low < high:
            data_df['verdict'] = data_df.apply(lambda row: get_verdict(row, low, high), axis=1)

            f1 = metrics.f1_score(data_df['verdict_actual'], data_df['verdict'], labels=[-1, 1, 0], average='weighted')

            if f1 > best_f1_score:
                best_f1_score = f1
                best_low = low
                best_high = high

print(best_f1_score)
print(best_low)
print(best_high)



0.36532800124454373
-0.6599999999999997
-0.6299999999999997


In [33]:
data_df['clean_verdict'] = data_df.apply(lambda row: get_verdict(row, best_low, best_high, cleaned=True), axis=1)
data_df['verdict'] = data_df.apply(lambda row: get_verdict(row, best_low, best_high, cleaned=False), axis=1)
data_df['random_verdict'] = data_df.apply(lambda row: get_verdict(row, best_low, best_high, cleaned=True, random=True), axis=1)


In [39]:
target_names = ['contradiction', 'support', 'nei']
labels = [-1, 1, 0]

print('With Redundancy Removal')
print(metrics.classification_report(data_df['verdict_actual'], data_df['clean_verdict'], target_names=target_names, labels=labels))
print("Accuracy Score: ", metrics.accuracy_score(data_df['verdict_actual'], data_df['clean_verdict']))

print()

print('Without Redundancy Removal')
print(metrics.classification_report(data_df['verdict_actual'], data_df['verdict'], target_names=target_names, labels=labels))
print("Accuracy Score: ", metrics.accuracy_score(data_df['verdict_actual'], data_df['verdict']))

print()

print('Entailment Randomized')
print(metrics.classification_report(data_df['verdict_actual'], data_df['random_verdict'], target_names=target_names, labels=labels))
print("Accuracy Score: ", metrics.accuracy_score(data_df['verdict_actual'], data_df['random_verdict']))


With Redundancy Removal
               precision    recall  f1-score   support

contradiction       0.28      0.27      0.27       768
      support       0.47      0.59      0.52      1228
          nei       0.20      0.07      0.11       456

     accuracy                           0.39      2452
    macro avg       0.31      0.31      0.30      2452
 weighted avg       0.36      0.39      0.37      2452

Accuracy Score:  0.39110929853181076

Without Redundancy Removal
               precision    recall  f1-score   support

contradiction       0.27      0.32      0.29       768
      support       0.45      0.52      0.48      1228
          nei       0.18      0.04      0.07       456

     accuracy                           0.37      2452
    macro avg       0.30      0.29      0.28      2452
 weighted avg       0.34      0.37      0.34      2452

Accuracy Score:  0.3674551386623165

Entailment Randomized
               precision    recall  f1-score   support

contradiction       

In [40]:
data_df.to_pickle('./data/data_verdict.pkl')

In [41]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,random_claim_evidence_1,random_entailment_scores_1,random_verdict,clean_verdict,verdict,verdict_actual
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,"{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...","{'(A baby died at an unnamed medical facility,...","[[0.6385632, -0.5613695, -0.52640414], [0.6686...",-1,-1,-1,0
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,"{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...","{'(Shawnee County, located in the administrati...","[[0.6175279, -0.7271561, -0.2998387], [-0.5644...",1,-1,-1,1
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,"{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...","{'(banned pork from school canteens, country, ...","[[0.679509, -0.6404532, -0.35789278], [0.69187...",-1,-1,-1,-1
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,"{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...","{'(Coronavirus, country, Canada)': ['(opioid, ...","[[0.69448966, -0.6130783, -0.37658876], [0.686...",-1,-1,-1,1
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,"{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...","{'(Wayne National Forest, instance of, wildlif...","[[0.6313532, -0.7029955, -0.3273997], [-0.2482...",0,-1,-1,1


# Data Source #2: FRED

In [None]:
data_df = pd.read_pickle('./data/fred/data_post_entailment.pkl')

In [None]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,clean_claim_triples,main_text_triples,clean_main_text_triples,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,verdict,verdict_actual,clean_verdict
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,"[(A baby died at an unnamed medical facility, ...","[(Confederate flag, instance of, racist), (Fie...","[(Confederate flag, instance of, racist), (Fie...","{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...",-1,0,-1
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,"[(Shawnee County, located in the administrativ...","[(KSNT, located in the administrative territor...","[(KSNT, located in the administrative territor...","{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...",-1,1,-1
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,"[(banned pork from school canteens, country, G...","[(Express, country, British), (Express, instan...","[(Express, country, British), (Express, instan...","{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...",-1,-1,-1
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,"[(Coronavirus, country, Canada), (safe drugs, ...","[(heroin, instance of, controlled substances),...","[(heroin, instance of, controlled substance), ...","{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...",-1,1,-1
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,"[(Wayne National Forest, instance of, wildlife...","[(oak forests, located in the administrative t...","[(oak forests, located in the administrative t...","{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...",-1,1,-1


In [None]:
data_df['verdict_actual'] = data_df['label'].apply(lambda label: 1 if label=='true' else (-1 if label=='false' else 0))

In [None]:
def get_verdict(row, low, high):
    entailment_scores = row['fred_entailment_scores_1']

    index_values = [-1, 1, 0]
    verdict = 0

    for subclaim_entailment_scores in entailment_scores:
        which_verdict = subclaim_entailment_scores.argmax()
        verdict += index_values[which_verdict]*subclaim_entailment_scores[which_verdict]

    if verdict > high:
        return 1
    elif verdict > low:
        return 0
    else:
        return -1

In [None]:
highs = np.arange(-1, 1, 0.01)
lows = np.arange(-1, 1, 0.01)

best_f1_score = -1
best_high = 0.1
best_low = -0.1
for high in highs:
    for low in lows:
        if low < high:
            data_df['verdict'] = data_df.apply(lambda row: get_verdict(row, low, high), axis=1)

            f1 = metrics.f1_score(data_df['verdict_actual'], data_df['verdict'], labels=[-1, 1, 0], average='weighted')

            if f1 > best_f1_score:
                best_f1_score = f1
                best_low = low
                best_high = high

print(best_f1_score)
print(best_low)
print(best_high)



0.36532800124454373
-0.6599999999999997
-0.6299999999999997


In [None]:
data_df['verdict'] = data_df.apply(lambda row: get_verdict(row, best_low, best_high, cleaned=False), axis=1)


In [None]:
target_names = ['contradiction', 'support', 'nei']
labels = [-1, 1, 0]

print(metrics.classification_report(data_df['verdict_actual'], data_df['verdict'], target_names=target_names, labels=labels))
print("Accuracy Score: ", metrics.accuracy_score(data_df['verdict_actual'], data_df['verdict']))


With Redundancy Removal
               precision    recall  f1-score   support

contradiction       0.28      0.27      0.27       768
      support       0.47      0.59      0.52      1228
          nei       0.20      0.07      0.11       456

     accuracy                           0.39      2452
    macro avg       0.31      0.31      0.30      2452
 weighted avg       0.36      0.39      0.37      2452

Accuracy Score:  0.39110929853181076

Without Redundancy Removal
               precision    recall  f1-score   support

contradiction       0.27      0.32      0.29       768
      support       0.45      0.52      0.48      1228
          nei       0.18      0.04      0.07       456

     accuracy                           0.37      2452
    macro avg       0.30      0.29      0.28      2452
 weighted avg       0.34      0.37      0.34      2452

Accuracy Score:  0.3674551386623165

Entailment Randomized
               precision    recall  f1-score   support

contradiction       

In [None]:
data_df.to_pickle('./data/fred/data_verdict.pkl')

In [None]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,random_claim_evidence_1,random_entailment_scores_1,random_verdict,clean_verdict,verdict,verdict_actual
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,"{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...","{'(A baby died at an unnamed medical facility,...","[[0.6385632, -0.5613695, -0.52640414], [0.6686...",-1,-1,-1,0
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,"{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...","{'(Shawnee County, located in the administrati...","[[0.6175279, -0.7271561, -0.2998387], [-0.5644...",1,-1,-1,1
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,"{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...","{'(banned pork from school canteens, country, ...","[[0.679509, -0.6404532, -0.35789278], [0.69187...",-1,-1,-1,-1
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,"{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...","{'(Coronavirus, country, Canada)': ['(opioid, ...","[[0.69448966, -0.6130783, -0.37658876], [0.686...",-1,-1,-1,1
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,"{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...","{'(Wayne National Forest, instance of, wildlif...","[[0.6313532, -0.7029955, -0.3273997], [-0.2482...",0,-1,-1,1


# Data Source #3: SpaCy

In [None]:
data_df = pd.read_pickle('./data/spacy/data_post_entailment.pkl')

In [None]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,clean_claim_triples,main_text_triples,clean_main_text_triples,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,verdict,verdict_actual,clean_verdict
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,"[(A baby died at an unnamed medical facility, ...","[(Confederate flag, instance of, racist), (Fie...","[(Confederate flag, instance of, racist), (Fie...","{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...",-1,0,-1
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,"[(Shawnee County, located in the administrativ...","[(KSNT, located in the administrative territor...","[(KSNT, located in the administrative territor...","{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...",-1,1,-1
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,"[(banned pork from school canteens, country, G...","[(Express, country, British), (Express, instan...","[(Express, country, British), (Express, instan...","{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...",-1,-1,-1
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,"[(Coronavirus, country, Canada), (safe drugs, ...","[(heroin, instance of, controlled substances),...","[(heroin, instance of, controlled substance), ...","{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...",-1,1,-1
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,"[(Wayne National Forest, instance of, wildlife...","[(oak forests, located in the administrative t...","[(oak forests, located in the administrative t...","{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...",-1,1,-1


In [None]:
data_df['verdict_actual'] = data_df['label'].apply(lambda label: 1 if label=='true' else (-1 if label=='false' else 0))

In [None]:
def get_verdict(row, low, high):
    entailment_scores = row['spacy_entailment_scores_1']

    index_values = [-1, 1, 0]
    verdict = 0

    for subclaim_entailment_scores in entailment_scores:
        which_verdict = subclaim_entailment_scores.argmax()
        verdict += index_values[which_verdict]*subclaim_entailment_scores[which_verdict]

    if verdict > high:
        return 1
    elif verdict > low:
        return 0
    else:
        return -1

In [None]:
highs = np.arange(-1, 1, 0.01)
lows = np.arange(-1, 1, 0.01)

best_f1_score = -1
best_high = 0.1
best_low = -0.1
for high in highs:
    for low in lows:
        if low < high:
            data_df['verdict'] = data_df.apply(lambda row: get_verdict(row, low, high), axis=1)

            f1 = metrics.f1_score(data_df['verdict_actual'], data_df['verdict'], labels=[-1, 1, 0], average='weighted')

            if f1 > best_f1_score:
                best_f1_score = f1
                best_low = low
                best_high = high

print(best_f1_score)
print(best_low)
print(best_high)



0.36532800124454373
-0.6599999999999997
-0.6299999999999997


In [None]:
data_df['verdict'] = data_df.apply(lambda row: get_verdict(row, best_low, best_high, cleaned=False), axis=1)


In [None]:
target_names = ['contradiction', 'support', 'nei']
labels = [-1, 1, 0]

print(metrics.classification_report(data_df['verdict_actual'], data_df['verdict'], target_names=target_names, labels=labels))
print("Accuracy Score: ", metrics.accuracy_score(data_df['verdict_actual'], data_df['verdict']))


With Redundancy Removal
               precision    recall  f1-score   support

contradiction       0.28      0.27      0.27       768
      support       0.47      0.59      0.52      1228
          nei       0.20      0.07      0.11       456

     accuracy                           0.39      2452
    macro avg       0.31      0.31      0.30      2452
 weighted avg       0.36      0.39      0.37      2452

Accuracy Score:  0.39110929853181076

Without Redundancy Removal
               precision    recall  f1-score   support

contradiction       0.27      0.32      0.29       768
      support       0.45      0.52      0.48      1228
          nei       0.18      0.04      0.07       456

     accuracy                           0.37      2452
    macro avg       0.30      0.29      0.28      2452
 weighted avg       0.34      0.37      0.34      2452

Accuracy Score:  0.3674551386623165

Entailment Randomized
               precision    recall  f1-score   support

contradiction       

In [None]:
data_df.to_pickle('./data/spacy/data_verdict.pkl')

In [None]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,random_claim_evidence_1,random_entailment_scores_1,random_verdict,clean_verdict,verdict,verdict_actual
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,"{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...","{'(A baby died at an unnamed medical facility,...","[[0.6385632, -0.5613695, -0.52640414], [0.6686...",-1,-1,-1,0
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,"{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...","{'(Shawnee County, located in the administrati...","[[0.6175279, -0.7271561, -0.2998387], [-0.5644...",1,-1,-1,1
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,"{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...","{'(banned pork from school canteens, country, ...","[[0.679509, -0.6404532, -0.35789278], [0.69187...",-1,-1,-1,-1
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,"{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...","{'(Coronavirus, country, Canada)': ['(opioid, ...","[[0.69448966, -0.6130783, -0.37658876], [0.686...",-1,-1,-1,1
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,"{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...","{'(Wayne National Forest, instance of, wildlif...","[[0.6313532, -0.7029955, -0.3273997], [-0.2482...",0,-1,-1,1


# Data Source #4: LLaMa

In [None]:
data_df = pd.read_pickle('./data/llama/data_post_entailment.pkl')

In [None]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,clean_claim_triples,main_text_triples,clean_main_text_triples,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,verdict,verdict_actual,clean_verdict
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,"[(A baby died at an unnamed medical facility, ...","[(Confederate flag, instance of, racist), (Fie...","[(Confederate flag, instance of, racist), (Fie...","{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...",-1,0,-1
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,"[(Shawnee County, located in the administrativ...","[(KSNT, located in the administrative territor...","[(KSNT, located in the administrative territor...","{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...",-1,1,-1
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,"[(banned pork from school canteens, country, G...","[(Express, country, British), (Express, instan...","[(Express, country, British), (Express, instan...","{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...",-1,-1,-1
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,"[(Coronavirus, country, Canada), (safe drugs, ...","[(heroin, instance of, controlled substances),...","[(heroin, instance of, controlled substance), ...","{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...",-1,1,-1
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,"[(Wayne National Forest, instance of, wildlife...","[(oak forests, located in the administrative t...","[(oak forests, located in the administrative t...","{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...",-1,1,-1


In [None]:
data_df['verdict_actual'] = data_df['label'].apply(lambda label: 1 if label=='true' else (-1 if label=='false' else 0))

In [None]:
def get_verdict(row, low, high):
    entailment_scores = row['llama_entailment_scores_1']

    index_values = [-1, 1, 0]
    verdict = 0

    for subclaim_entailment_scores in entailment_scores:
        which_verdict = subclaim_entailment_scores.argmax()
        verdict += index_values[which_verdict]*subclaim_entailment_scores[which_verdict]

    if verdict > high:
        return 1
    elif verdict > low:
        return 0
    else:
        return -1

In [None]:
highs = np.arange(-1, 1, 0.01)
lows = np.arange(-1, 1, 0.01)

best_f1_score = -1
best_high = 0.1
best_low = -0.1
for high in highs:
    for low in lows:
        if low < high:
            data_df['verdict'] = data_df.apply(lambda row: get_verdict(row, low, high), axis=1)

            f1 = metrics.f1_score(data_df['verdict_actual'], data_df['verdict'], labels=[-1, 1, 0], average='weighted')

            if f1 > best_f1_score:
                best_f1_score = f1
                best_low = low
                best_high = high

print(best_f1_score)
print(best_low)
print(best_high)



0.36532800124454373
-0.6599999999999997
-0.6299999999999997


In [None]:
data_df['verdict'] = data_df.apply(lambda row: get_verdict(row, best_low, best_high, cleaned=False), axis=1)


In [None]:
target_names = ['contradiction', 'support', 'nei']
labels = [-1, 1, 0]

print(metrics.classification_report(data_df['verdict_actual'], data_df['verdict'], target_names=target_names, labels=labels))
print("Accuracy Score: ", metrics.accuracy_score(data_df['verdict_actual'], data_df['verdict']))


With Redundancy Removal
               precision    recall  f1-score   support

contradiction       0.28      0.27      0.27       768
      support       0.47      0.59      0.52      1228
          nei       0.20      0.07      0.11       456

     accuracy                           0.39      2452
    macro avg       0.31      0.31      0.30      2452
 weighted avg       0.36      0.39      0.37      2452

Accuracy Score:  0.39110929853181076

Without Redundancy Removal
               precision    recall  f1-score   support

contradiction       0.27      0.32      0.29       768
      support       0.45      0.52      0.48      1228
          nei       0.18      0.04      0.07       456

     accuracy                           0.37      2452
    macro avg       0.30      0.29      0.28      2452
 weighted avg       0.34      0.37      0.34      2452

Accuracy Score:  0.3674551386623165

Entailment Randomized
               precision    recall  f1-score   support

contradiction       

In [None]:
data_df.to_pickle('./data/llama/data_verdict.pkl')

In [None]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,random_claim_evidence_1,random_entailment_scores_1,random_verdict,clean_verdict,verdict,verdict_actual
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,"{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...","{'(A baby died at an unnamed medical facility,...","[[0.6385632, -0.5613695, -0.52640414], [0.6686...",-1,-1,-1,0
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,"{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...","{'(Shawnee County, located in the administrati...","[[0.6175279, -0.7271561, -0.2998387], [-0.5644...",1,-1,-1,1
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,"{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...","{'(banned pork from school canteens, country, ...","[[0.679509, -0.6404532, -0.35789278], [0.69187...",-1,-1,-1,-1
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,"{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...","{'(Coronavirus, country, Canada)': ['(opioid, ...","[[0.69448966, -0.6130783, -0.37658876], [0.686...",-1,-1,-1,1
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,"{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...","{'(Wayne National Forest, instance of, wildlif...","[[0.6313532, -0.7029955, -0.3273997], [-0.2482...",0,-1,-1,1
