In [2]:
import pandas as pd
import numpy as np
import random
from semantic_roberta import get_similarity_scores_triples, get_topk_similar_evidences
from textual_entailment import get_entailment



# Data Source #1: REBEL

In [2]:
data_df = pd.read_pickle('./data/data.pkl')

In [3]:
def print_triples_sizes(df):
    df['claim_kg_size'] = df['claim_kg'].apply(lambda triples : len(triples))
    df['main_text_kg_size'] = df['main_text_kg'].apply(lambda triples : len(triples))

    print(df[['claim_kg_size', 'main_text_kg_size']].describe())
    
print_triples_sizes(data_df)

       claim_kg_size  main_text_kg_size
count    2452.000000        2452.000000
mean        3.113785          23.469005
std         0.534892          15.511103
min         1.000000           2.000000
25%         3.000000          13.000000
50%         3.000000          21.000000
75%         3.000000          30.000000
max         9.000000         151.000000


## Redundancy Removal

In [22]:
def get_cleaned_triples(kg, clean=True):
    triples = [f"({triple['head']}, {triple['type']}, {triple['tail']})" for triple in kg]

    if clean:
        n = len(triples)
        sim_scores = get_similarity_scores_triples(triples)

        to_drop = np.zeros(n, dtype=bool)

        for i in range(n):
            # Skip sentences that have already been marked for dropping
            if to_drop[i]:
                continue

            for j in range(i + 1, n):
                # If the similarity score between sentences i and j is above the threshold, mark one of them for dropping
                if sim_scores[i, j] >= 0.9:
                    to_drop[i] = True
                    break

        # Return the indices of sentences that are not marked for dropping
        clean_triples = [triples[i] for i in range(n) if not to_drop[i]]
        return clean_triples
    else:
        return triples

In [23]:
data_df['claim_triples'] = data_df['claim_kg'].apply(lambda kg: get_cleaned_triples(kg, clean=False))

In [7]:
data_df['clean_claim_triples'] = data_df['claim_kg'].apply(lambda kg: get_cleaned_triples(kg, clean=True))

In [9]:
data_df['claim_triples'].apply(lambda x: len(x)).describe()

count    2452.000000
mean        3.113785
std         0.534892
min         1.000000
25%         3.000000
50%         3.000000
75%         3.000000
max         9.000000
Name: claim_triples, dtype: float64

In [10]:
data_df['clean_claim_triples'].apply(lambda x: len(x)).describe()

count    2452.000000
mean        2.207178
std         0.777266
min         1.000000
25%         2.000000
50%         2.000000
75%         3.000000
max         8.000000
Name: clean_claim_triples, dtype: float64

In [11]:
data_df.to_pickle('dev_test.pkl')

In [12]:
data_df['main_text_triples'] = data_df['main_text_kg'].apply(lambda kg: get_cleaned_triples(kg, clean=False))

In [14]:
data_df['clean_main_text_triples'] = data_df['main_text_kg'].apply(lambda kg: get_cleaned_triples(kg, clean=True))

In [16]:
data_df.to_pickle('./data/data_post_redundancy_removal.pkl')

In [18]:
data_df.head()

Unnamed: 0.1,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,main_text_kg,Unnamed: 0,claim_kg_size,main_text_kg_size,claim_triples,clean_claim_triples,main_text_triples,clean_main_text_triples
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,"[{'head': 'Confederate flag', 'type': 'instanc...",,3,14,"[(A baby died at an unnamed medical facility, ...","[(A baby died at an unnamed medical facility, ...","[(Confederate flag, instance of, racist), (Fie...","[(Confederate flag, instance of, racist), (Fie..."
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...","[{'head': 'KSNT', 'type': 'located in the admi...",,3,6,"[(Bat from Shawnee County, has cause, rabies),...","[(Shawnee County, located in the administrativ...","[(KSNT, located in the administrative territor...","[(KSNT, located in the administrative territor..."
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...","[{'head': 'Express', 'type': 'country', 'tail'...",,3,36,"[(banned pork from school canteens, country, G...","[(banned pork from school canteens, country, G...","[(Express, country, British), (Express, instan...","[(Express, country, British), (Express, instan..."
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...","[{'head': 'heroin', 'type': 'instance of', 'ta...",,3,21,"[(Coronavirus, country, Canada), (safe drug, c...","[(Coronavirus, country, Canada), (safe drugs, ...","[(heroin, instance of, controlled substances),...","[(heroin, instance of, controlled substance), ..."
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...","[{'head': 'oak forests', 'type': 'located in t...",,3,3,"[(Wayne National Forest, instance of, forest),...","[(Wayne National Forest, instance of, wildlife...","[(oak forests, located in the administrative t...","[(oak forests, located in the administrative t..."


## Evidence Retrieval

In [7]:
data_df = pd.read_pickle('./data/data_post_entailment.pkl')

In [8]:
def pair_most_related_evidence(row, k, cleaned=True, randomize=False):

    if cleaned:
        claim_triples = row['clean_claim_triples']
        evidence_triples = row['clean_main_text_triples']
    else:
        claim_triples = row['claim_triples']
        evidence_triples = row['main_text_triples']

    relevant_claim_evidence_pairs = {}
    for claim_triple in claim_triples:
        if randomize:
            topk_evidences = random.sample(evidence_triples, k)
        else:
            topk_evidences = get_topk_similar_evidences(claim_triple, evidence_triples, k)
        relevant_claim_evidence_pairs[claim_triple] = topk_evidences

    if row.name%100 == 0:
        print(row.name)

    return relevant_claim_evidence_pairs

In [21]:
data_df['cleaned_claim_evidence_1'] = data_df.apply(lambda row: pair_most_related_evidence(row, 1, cleaned=True), axis=1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [9]:
data_df['random_claim_evidence_1'] = data_df.apply(lambda row: pair_most_related_evidence(row, 1, cleaned=True, randomize=True), axis=1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [25]:
data_df['claim_evidence_1'] = data_df.apply(lambda row: pair_most_related_evidence(row, 1, cleaned=False), axis=1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [10]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,main_text_kg_size,claim_triples,clean_claim_triples,main_text_triples,clean_main_text_triples,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,random_claim_evidence_1
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,14,"[(A baby died at an unnamed medical facility, ...","[(A baby died at an unnamed medical facility, ...","[(Confederate flag, instance of, racist), (Fie...","[(Confederate flag, instance of, racist), (Fie...","{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...","{'(A baby died at an unnamed medical facility,..."
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,6,"[(Bat from Shawnee County, has cause, rabies),...","[(Shawnee County, located in the administrativ...","[(KSNT, located in the administrative territor...","[(KSNT, located in the administrative territor...","{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...","{'(Shawnee County, located in the administrati..."
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,36,"[(banned pork from school canteens, country, G...","[(banned pork from school canteens, country, G...","[(Express, country, British), (Express, instan...","[(Express, country, British), (Express, instan...","{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...","{'(banned pork from school canteens, country, ..."
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,21,"[(Coronavirus, country, Canada), (safe drug, c...","[(Coronavirus, country, Canada), (safe drugs, ...","[(heroin, instance of, controlled substances),...","[(heroin, instance of, controlled substance), ...","{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...","{'(Coronavirus, country, Canada)': ['(opioid, ..."
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,3,"[(Wayne National Forest, instance of, forest),...","[(Wayne National Forest, instance of, wildlife...","[(oak forests, located in the administrative t...","[(oak forests, located in the administrative t...","{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...","{'(Wayne National Forest, instance of, wildlif..."


In [11]:
data_df.to_pickle('./data/data_post_evidence_retrieval.pkl')

In [14]:
def get_entailment_scores(row, cleaned=True, random=False):
    
    if random:
        claim_evidences = row['random_claim_evidence_1']
    else:
        if cleaned:
            claim_evidences = row['cleaned_claim_evidence_1']
        else:
            claim_evidences = row['claim_evidence_1']

    evidence_claim_tuples = []
    for claim, evidences in claim_evidences.items():
        evidence_claim_tuples.append((evidences[0], claim))

    if row.name%100==0:
        print(row.name)

    return get_entailment(evidence_claim_tuples)

In [29]:
data_df['cleaned_entailment_scores_1'] = data_df.apply(lambda row : get_entailment_scores(row, cleaned=True), axis=1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [15]:
data_df['random_entailment_scores_1'] = data_df.apply(lambda row : get_entailment_scores(row, cleaned=True, random=True), axis=1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [30]:
data_df['entailment_scores_1'] = data_df.apply(lambda row : get_entailment_scores(row, cleaned=False), axis=1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [16]:
data_df.to_pickle('./data/data_post_entailment.pkl')

# Data Source #2: FRED

In [17]:
data_df = pd.read_pickle('./data/fred/data.pkl')

In [21]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,main_text_kg,FRED_claim_kg,FRED_main_text_kg,FRED_claim_kg_size,FRED_main_text_kg_size
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,"[{'head': 'Confederate flag', 'type': 'instanc...",[{'head': 'A baby died at an unnamed medical f...,"[{'head': 'Confederate flag', 'type': 'instanc...",3,14
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...","[{'head': 'KSNT', 'type': 'located in the admi...","[{'head': 'Bat from Shawnee County', 'type': '...","[{'head': 'report', 'type': 'Agent', 'tail': '...",3,131
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...","[{'head': 'Express', 'type': 'country', 'tail'...","[{'head': 'banned pork from school canteens', ...","[{'head': 'Express', 'type': 'country', 'tail'...",3,36
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...","[{'head': 'heroin', 'type': 'instance of', 'ta...","[{'head': 'Coronavirus', 'type': 'country', 't...","[{'head': 'heroin', 'type': 'instance of', 'ta...",3,21
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...","[{'head': 'oak forests', 'type': 'located in t...","[{'head': 'Wayne National Forest', 'type': 'in...","[{'head': 'oak forests', 'type': 'located in t...",3,3


In [18]:
def print_triples_sizes(df):
    df['FRED_claim_kg_size'] = df['FRED_claim_kg'].apply(lambda triples : len(triples))
    df['FRED_main_text_kg_size'] = df['FRED_main_text_kg'].apply(lambda triples : len(triples))

    print(df[['FRED_claim_kg_size', 'FRED_main_text_kg_size']].describe())
    
print_triples_sizes(data_df)

       FRED_claim_kg_size  FRED_main_text_kg_size
count         2451.000000             2451.000000
mean             8.912689              152.822929
std              9.377596              287.535709
min              0.000000                0.000000
25%              3.000000               17.000000
50%              3.000000               27.000000
75%             14.000000               77.000000
max             93.000000             2083.000000


## Redundancy Removal

In [30]:
def get_cleaned_triples(row, kg_column='claim_kg', clean=True):
    kg = row[kg_column]
    index = row.name
    triples = [f"({triple['head']}, {triple['type']}, {triple['tail']})" for triple in kg]

    if index%100 == 0:
        print(index)

    try:
        if clean:
            n = len(triples)
            sim_scores = get_similarity_scores_triples(triples)

            to_drop = np.zeros(n, dtype=bool)

            for i in range(n):
                # Skip sentences that have already been marked for dropping
                if to_drop[i]:
                    continue

                for j in range(i + 1, n):
                    # If the similarity score between sentences i and j is above the threshold, mark one of them for dropping
                    if sim_scores[i, j] >= 0.9:
                        to_drop[i] = True
                        break

            # Return the indices of sentences that are not marked for dropping
            clean_triples = [triples[i] for i in range(n) if not to_drop[i]]
            return clean_triples
        else:
            return triples

    except:
        return triples

In [33]:
data_df['FRED_claim_triples'] = data_df.apply(lambda row: get_cleaned_triples(row, kg_column='FRED_claim_kg'), axis=1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [34]:
data_df['FRED_claim_triples'].apply(lambda x: len(x)).describe()

count    2451.000000
mean        7.930233
std         8.820456
min         0.000000
25%         2.000000
50%         3.000000
75%        13.000000
max        70.000000
Name: FRED_claim_triples, dtype: float64

In [35]:
data_df['FRED_main_text_triples'] = data_df.apply(lambda row: get_cleaned_triples(row, kg_column='FRED_main_text_kg'), axis=1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [36]:
data_df['FRED_main_text_triples'].apply(lambda x: len(x)).describe()

count    2451.000000
mean      145.798042
std       289.095865
min         0.000000
25%        12.000000
50%        20.000000
75%        55.500000
max      2083.000000
Name: FRED_main_text_triples, dtype: float64

In [37]:
data_df.to_pickle('./data/fred/data_post_redundancy_removal.pkl')

In [38]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,main_text_kg,FRED_claim_kg,FRED_main_text_kg,FRED_claim_kg_size,FRED_main_text_kg_size,FRED_claim_triples,FRED_main_text_triples
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,"[{'head': 'Confederate flag', 'type': 'instanc...",[{'head': 'A baby died at an unnamed medical f...,"[{'head': 'Confederate flag', 'type': 'instanc...",3,14,"[(A baby died at an unnamed medical facility, ...","[(Confederate flag, instance of, racist), (Fie..."
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...","[{'head': 'KSNT', 'type': 'located in the admi...","[{'head': 'Bat from Shawnee County', 'type': '...","[{'head': 'report', 'type': 'Agent', 'tail': '...",3,131,"[(Shawnee County, located in the administrativ...","[(report, Agent, ksnt), (report, Theme, find),..."
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...","[{'head': 'Express', 'type': 'country', 'tail'...","[{'head': 'banned pork from school canteens', ...","[{'head': 'Express', 'type': 'country', 'tail'...",3,36,"[(banned pork from school canteens, country, G...","[(Express, country, British), (Express, instan..."
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...","[{'head': 'heroin', 'type': 'instance of', 'ta...","[{'head': 'Coronavirus', 'type': 'country', 't...","[{'head': 'heroin', 'type': 'instance of', 'ta...",3,21,"[(Coronavirus, country, Canada), (safe drugs, ...","[(heroin, instance of, controlled substance), ..."
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...","[{'head': 'oak forests', 'type': 'located in t...","[{'head': 'Wayne National Forest', 'type': 'in...","[{'head': 'oak forests', 'type': 'located in t...",3,3,"[(Wayne National Forest, instance of, wildlife...","[(oak forests, located in the administrative t..."


## Evidence Retrieval

In [3]:
data_df = pd.read_pickle('./data/fred/data_post_redundancy_removal.pkl')

In [4]:
def pair_most_related_evidence(row, k):

    claim_triples = row['FRED_claim_triples']
    evidence_triples = row['FRED_main_text_triples']

    relevant_claim_evidence_pairs = {}
    for claim_triple in claim_triples:
        topk_evidences = get_topk_similar_evidences(claim_triple, evidence_triples, k)
        relevant_claim_evidence_pairs[claim_triple] = topk_evidences

    if row.name%100 == 0:
        print(row.name)

    return relevant_claim_evidence_pairs

In [5]:
data_df['FRED_claim_evidence_1'] = data_df.apply(lambda row: pair_most_related_evidence(row, 1), axis=1)

0


OutOfMemoryError: CUDA out of memory. Tried to allocate 586.00 MiB. GPU 0 has a total capacty of 4.00 GiB of which 0 bytes is free. Of the allocated memory 2.26 GiB is allocated by PyTorch, and 1.11 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
data_df.head()

Unnamed: 0,claim_id,claim,date_published,explanation,fact_checkers,main_text,sources,label,subjects,claim_kg,...,main_text_kg_size,claim_triples,clean_claim_triples,main_text_triples,clean_main_text_triples,cleaned_claim_evidence_1,claim_evidence_1,cleaned_entailment_scores_1,entailment_scores_1,random_claim_evidence_1
0,34656,A baby died at an unnamed medical facility be...,"November 10, 2015",Fellow Twitter users suggested @FierceFemtivis...,Kim LaCapria,"On 8 November 2015, former Twitter user @Fierc...",http://webcache.googleusercontent.com/search?q...,unproven,"Politics, fiercefemtivist, racism",[{'head': 'A baby died at an unnamed medical f...,...,14,"[(A baby died at an unnamed medical facility, ...","[(A baby died at an unnamed medical facility, ...","[(Confederate flag, instance of, racist), (Fie...","[(Confederate flag, instance of, racist), (Fie...","{'(A baby died at an unnamed medical facility,...","{'(A baby died at an unnamed medical facility,...","[[0.66573095, -0.5662217, -0.4859993], [0.4305...","[[0.66573095, -0.56622165, -0.48599926], [0.63...","{'(A baby died at an unnamed medical facility,..."
1,3632,Bat from Shawnee County tests positive for rab...,,A bat found in northeastern Kansas has tested ...,,Topeka television station KSNT reports that th...,https://www.ksnt.com/news/bat-tests-positive-f...,true,"Rabies, Health, General News, Kansas, Bats, To...","[{'head': 'Bat from Shawnee County', 'type': '...",...,6,"[(Bat from Shawnee County, has cause, rabies),...","[(Shawnee County, located in the administrativ...","[(KSNT, located in the administrative territor...","[(KSNT, located in the administrative territor...","{'(Shawnee County, located in the administrati...","{'(Bat from Shawnee County, has cause, rabies)...","[[0.6284045, -0.6450901, -0.4347028], [0.61757...","[[0.62699527, -0.7306919, -0.27012262], [0.628...","{'(Shawnee County, located in the administrati..."
2,29558,Germany has banned pork from school canteens b...,"March 7, 2016",What's true: Some politicians complained that ...,Kim LaCapria,"On 7 March 2016, British tabloid Express repor...",http://bnp.org.uk/news/regional/bnp-victory-br...,false,Politics,"[{'head': 'banned pork from school canteens', ...",...,36,"[(banned pork from school canteens, country, G...","[(banned pork from school canteens, country, G...","[(Express, country, British), (Express, instan...","[(Express, country, British), (Express, instan...","{'(banned pork from school canteens, country, ...","{'(banned pork from school canteens, country, ...","[[0.6420961, -0.6923114, -0.3292681], [0.68023...","[[0.6420961, -0.6923114, -0.3292681], [0.68787...","{'(banned pork from school canteens, country, ..."
3,8416,Coronavirus prompts Canada to roll out safe dr...,"April 16, 2020",Canada’s Pacific province of British Columbia ...,Tessa Vikander,"In March, the Canadian government urged provin...",,true,Health News,"[{'head': 'Coronavirus', 'type': 'country', 't...",...,21,"[(Coronavirus, country, Canada), (safe drug, c...","[(Coronavirus, country, Canada), (safe drugs, ...","[(heroin, instance of, controlled substances),...","[(heroin, instance of, controlled substance), ...","{'(Coronavirus, country, Canada)': ['(coronavi...","{'(Coronavirus, country, Canada)': ['(coronavi...","[[-0.15032902, -0.75801086, 0.6346816], [0.660...","[[-0.10043568, -0.7755534, 0.6232412], [0.6620...","{'(Coronavirus, country, Canada)': ['(opioid, ..."
4,7169,"Wayne National Forest plans fires for tree, wi...",,"Nearly 2,000 acres of Wayne National Forest in...",,Forest officials say scientists who study nati...,,true,"Plants, Wildlife, Health, Wildlife health, For...","[{'head': 'Wayne National Forest', 'type': 'in...",...,3,"[(Wayne National Forest, instance of, forest),...","[(Wayne National Forest, instance of, wildlife...","[(oak forests, located in the administrative t...","[(oak forests, located in the administrative t...","{'(Wayne National Forest, instance of, wildlif...","{'(Wayne National Forest, instance of, forest)...","[[0.63135314, -0.7029956, -0.32739952], [0.662...","[[0.63064003, -0.6786143, -0.3765312], [0.6313...","{'(Wayne National Forest, instance of, wildlif..."


In [None]:
data_df.to_pickle('./data/fred/data_post_evidence_retrieval.pkl')

In [None]:
def get_entailment_scores(row):
    
    claim_evidences = row['FRED_claim_evidence_1']

    evidence_claim_tuples = []
    for claim, evidences in claim_evidences.items():
        evidence_claim_tuples.append((evidences[0], claim))

    if row.name%100==0:
        print(row.name)

    return get_entailment(evidence_claim_tuples)

In [None]:
data_df['FRED_entailment_scores_1'] = data_df.apply(lambda row : get_entailment_scores(row), axis=1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [None]:
data_df.to_pickle('./data/fred/data_post_entailment.pkl')