### Libraries

%pip install gensim nltk pandas

Code based on https://radimrehurek.com/gensim/auto_examples/tutorials/run_doc2vec_lee.html

In [49]:
import json
import nltk
from nltk.tokenize import word_tokenize
import pandas as pd
import gensim
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
import collections


# 1. Preprocess the data

In [50]:
# Read train claims
with open('../data/train-claims.json', 'r') as f:
    claims = json.load(f)

# Read dev claims
with open('../data/dev-claims.json', 'r') as f:
    dev_claims = json.load(f)

In [51]:
# Lowercasing the 'claim_text' field for each claim
for claim_id, claim_info in claims.items():
    claim_info['claim_text'] = claim_info['claim_text'].lower()

for claim_id, claim_info in dev_claims.items():
    claim_info['claim_text'] = claim_info['claim_text'].lower()

In [52]:
# Read evidence
with open('../data/evidence.json', 'r') as f:
    evidences = json.load(f)

In [53]:
evidences = {i: str.lower(j) for i,j in evidences.items()}

In [54]:
print("Number of claims for training = {0}".format(len(claims)))
print("Number of claims for development = {0}".format(len(dev_claims)))
print("Number of evidences = {0}".format(len(evidences)))

Number of claims for training = 1228
Number of claims for development = 154
Number of evidences = 1208827


### Prepare the corpus

In [55]:
# Collect all texts from claims
corpus = {}

for id, claim in claims.items():
    corpus[id] = str.strip(claim['claim_text'])  # Add claim text

    text = claim['claim_text']
    for evidence in claim['evidences']:
        text = text + " " + evidences[evidence]
    corpus[id + '- evidences'] = str.strip(text)

for id, evidence in evidences.items():
    corpus[id] = str.strip(evidence) # Add evidence text

In [56]:
# Collect all texts from claims
#corpus = {}
#for id, claim in claims.items():
#    corpus[id] = str.strip(claim['claim_text'])  # Add claim text

#for id, evidence in evidences.items():
#    corpus[id] = str.strip(evidence) # Add evidence text

In [57]:
def tokenize_text(df, column):
    df['tokens'] = df[column].apply(lambda x: [token for token in word_tokenize(x) if token.isalnum()])
    return df

In [58]:
# Convert the list of documents into a pandas DataFrame
df = pd.DataFrame.from_dict(corpus, orient='index', columns=['text'])


In [59]:
df = tokenize_text(df,'text')
df

Unnamed: 0,text,tokens
claim-1937,not only is there no scientific evidence that ...,"[not, only, is, there, no, scientific, evidenc..."
claim-1937- evidences,not only is there no scientific evidence that ...,"[not, only, is, there, no, scientific, evidenc..."
claim-126,el niño drove record highs in global temperatu...,"[el, niño, drove, record, highs, in, global, t..."
claim-126- evidences,el niño drove record highs in global temperatu...,"[el, niño, drove, record, highs, in, global, t..."
claim-2510,"in 1946, pdo switched to a cool phase.","[in, 1946, pdo, switched, to, a, cool, phase]"
...,...,...
evidence-1208822,also on the property is a contributing garage ...,"[also, on, the, property, is, a, contributing,..."
evidence-1208823,| class = ``fn org'' | fyrde | | | | 6110 | | ...,"[class, fn, org, fyrde, 6110, volda]"
evidence-1208824,"dragon storm (game), a role-playing game and c...","[dragon, storm, game, a, game, and, collectibl..."
evidence-1208825,it states that the zeriuani ``which is so grea...,"[it, states, that, the, zeriuani, which, is, s..."


In [60]:
# Define a function to be applied to each row
def process_row(row, index):
    return TaggedDocument(row['tokens'], tags=[index])

df['tagged'] = df.apply(lambda row: process_row(row, row.name), axis=1)
df

Unnamed: 0,text,tokens,tagged
claim-1937,not only is there no scientific evidence that ...,"[not, only, is, there, no, scientific, evidenc...","([not, only, is, there, no, scientific, eviden..."
claim-1937- evidences,not only is there no scientific evidence that ...,"[not, only, is, there, no, scientific, evidenc...","([not, only, is, there, no, scientific, eviden..."
claim-126,el niño drove record highs in global temperatu...,"[el, niño, drove, record, highs, in, global, t...","([el, niño, drove, record, highs, in, global, ..."
claim-126- evidences,el niño drove record highs in global temperatu...,"[el, niño, drove, record, highs, in, global, t...","([el, niño, drove, record, highs, in, global, ..."
claim-2510,"in 1946, pdo switched to a cool phase.","[in, 1946, pdo, switched, to, a, cool, phase]","([in, 1946, pdo, switched, to, a, cool, phase]..."
...,...,...,...
evidence-1208822,also on the property is a contributing garage ...,"[also, on, the, property, is, a, contributing,...","([also, on, the, property, is, a, contributing..."
evidence-1208823,| class = ``fn org'' | fyrde | | | | 6110 | | ...,"[class, fn, org, fyrde, 6110, volda]","([class, fn, org, fyrde, 6110, volda], [eviden..."
evidence-1208824,"dragon storm (game), a role-playing game and c...","[dragon, storm, game, a, game, and, collectibl...","([dragon, storm, game, a, game, and, collectib..."
evidence-1208825,it states that the zeriuani ``which is so grea...,"[it, states, that, the, zeriuani, which, is, s...","([it, states, that, the, zeriuani, which, is, ..."


In [61]:
train_corpus = df.tagged.values
train_corpus

array([TaggedDocument(words=['not', 'only', 'is', 'there', 'no', 'scientific', 'evidence', 'that', 'co2', 'is', 'a', 'pollutant', 'higher', 'co2', 'concentrations', 'actually', 'help', 'ecosystems', 'support', 'more', 'plant', 'and', 'animal', 'life'], tags=['claim-1937']),
       TaggedDocument(words=['not', 'only', 'is', 'there', 'no', 'scientific', 'evidence', 'that', 'co2', 'is', 'a', 'pollutant', 'higher', 'co2', 'concentrations', 'actually', 'help', 'ecosystems', 'support', 'more', 'plant', 'and', 'animal', 'life', 'at', 'very', 'high', 'concentrations', '100', 'times', 'atmospheric', 'concentration', 'or', 'greater', 'carbon', 'dioxide', 'can', 'be', 'toxic', 'to', 'animal', 'life', 'so', 'raising', 'the', 'concentration', 'to', 'ppm', '1', 'or', 'higher', 'for', 'several', 'hours', 'will', 'eliminate', 'pests', 'such', 'as', 'whiteflies', 'and', 'spider', 'mites', 'in', 'a', 'greenhouse', 'plants', 'can', 'grow', 'as', 'much', 'as', '50', 'percent', 'faster', 'in', 'concentrati

## Train Model
https://radimrehurek.com/gensim/auto_examples/tutorials/run_doc2vec_lee.html#sphx-glr-auto-examples-tutorials-run-doc2vec-lee-py

In [62]:
model = Doc2Vec(vector_size=100, min_count=2, epochs=50)

In [63]:
model.build_vocab(train_corpus)

In [64]:
model.train(train_corpus, total_examples=model.corpus_count, epochs=model.epochs)

In [65]:
model.save("Doc2Vec.model")

In [66]:
#model.load("Doc2Vec.model")

## Assesing the model

In [67]:
def get_top10_rank(inferred_vector):
    similarity_vector = model.dv.most_similar([inferred_vector], topn=len(model.dv))
    return similarity_vector[0:10]

In [68]:
def filter_and_predict(sims):
    filtered_list = [item for item in sims if not item[0].startswith('claim')]
    prediction = [sim[0] for sim in filtered_list if sim[1] > 0.5 and sim[0]]
    return prediction

In [69]:
train_df = pd.DataFrame.from_dict(claims, orient='index')
train_df = tokenize_text(train_df,'claim_text')
train_df['inferred'] = train_df['tokens'].apply(lambda x: model.infer_vector(x))
train_df['sims'] = train_df['inferred'].apply(lambda x: get_top10_rank(x))
train_df['predictions'] = train_df['sims'].apply(filter_and_predict)
train_df

Unnamed: 0,claim_text,claim_label,evidences,tokens,inferred,sims,predictions
claim-1937,not only is there no scientific evidence that ...,DISPUTED,"[evidence-442946, evidence-1194317, evidence-1...","[not, only, is, there, no, scientific, evidenc...","[0.79618436, -0.22726962, 0.14112796, -0.37445...","[(claim-1937, 0.8163521885871887), (evidence-7...","[evidence-795805, evidence-897647, evidence-45..."
claim-126,el niño drove record highs in global temperatu...,REFUTES,"[evidence-338219, evidence-1127398]","[el, niño, drove, record, highs, in, global, t...","[-0.13986285, -0.0922804, -0.34083655, -0.1689...","[(claim-126, 0.7222234606742859), (evidence-80...","[evidence-804941, evidence-450402, evidence-58..."
claim-2510,"in 1946, pdo switched to a cool phase.",SUPPORTS,"[evidence-530063, evidence-984887]","[in, 1946, pdo, switched, to, a, cool, phase]","[0.0074584247, -0.05109416, -0.29815012, 0.287...","[(claim-2510, 0.6709652543067932), (evidence-6...","[evidence-62487, evidence-393012, evidence-265..."
claim-2021,weather channel co-founder john coleman provid...,DISPUTED,"[evidence-1177431, evidence-782448, evidence-5...","[weather, channel, john, coleman, provided, ev...","[0.5130342, -0.13854378, -0.59057415, 0.173502...","[(claim-2021, 0.7391155362129211), (claim-2020...","[evidence-816582, evidence-498011, evidence-75..."
claim-2449,"""january 2008 capped a 12 month period of glob...",NOT_ENOUGH_INFO,"[evidence-1010750, evidence-91661, evidence-72...","[january, 2008, capped, a, 12, month, period, ...","[0.4213658, -1.6297535, -0.22056714, -0.293030...","[(claim-2449, 0.9100427031517029), (claim-2449...",[evidence-1001860]
...,...,...,...,...,...,...,...
claim-1504,climate scientists say that aspects of the cas...,SUPPORTS,"[evidence-1055682, evidence-1047356, evidence-...","[climate, scientists, say, that, aspects, of, ...","[0.06361664, 0.01792881, -0.3761822, 0.0437825...","[(claim-95, 0.823792576789856), (claim-1504, 0...",[evidence-948149]
claim-243,"in its 5th assessment report in 2013, the ipcc...",SUPPORTS,[evidence-916755],"[in, its, 5th, assessment, report, in, 2013, t...","[-1.6132338, -0.7972539, -0.60836667, -0.77611...","[(claim-243, 0.7400893568992615), (claim-243- ...","[evidence-57808, evidence-1033017, evidence-92..."
claim-2302,"since the mid 1970s, global temperatures have ...",NOT_ENOUGH_INFO,"[evidence-403673, evidence-889933, evidence-11...","[since, the, mid, 1970s, global, temperatures,...","[0.0022502998, -0.676605, 0.1938891, -0.014069...","[(evidence-84584, 0.5805303454399109), (eviden...","[evidence-84584, evidence-42148, evidence-1045..."
claim-502,but abnormal temperature spikes in february an...,NOT_ENOUGH_INFO,"[evidence-97375, evidence-562427, evidence-521...","[but, abnormal, temperature, spikes, in, febru...","[-0.28964463, 0.80272794, -0.39754498, -0.7879...","[(claim-502, 0.7460736036300659), (evidence-10...",[evidence-1046270]


In [70]:
dev_df = pd.DataFrame.from_dict(dev_claims, orient='index')
dev_df = tokenize_text(dev_df,'claim_text')
dev_df['inferred'] = dev_df['tokens'].apply(lambda x: model.infer_vector(x))
dev_df['sims'] = dev_df['inferred'].apply(lambda x: get_top10_rank(x))
dev_df['predictions'] = dev_df['sims'].apply(filter_and_predict)
dev_df

Unnamed: 0,claim_text,claim_label,evidences,tokens,inferred,sims,predictions
claim-752,[south australia] has the most expensive elect...,SUPPORTS,"[evidence-67732, evidence-572512]","[south, australia, has, the, most, expensive, ...","[0.064753525, 0.7276473, 0.6037031, 0.19164473...","[(evidence-572512, 0.790063202381134), (eviden...","[evidence-572512, evidence-862245, evidence-88..."
claim-375,when 3 per cent of total annual global emissio...,NOT_ENOUGH_INFO,"[evidence-996421, evidence-1080858, evidence-2...","[when, 3, per, cent, of, total, annual, global...","[-0.6699967, 1.6433822, -0.301362, -0.521104, ...","[(evidence-106689, 0.5295317769050598), (evide...","[evidence-106689, evidence-1171864, evidence-1..."
claim-1266,this means that the world is now 1c warmer tha...,SUPPORTS,"[evidence-889933, evidence-694262]","[this, means, that, the, world, is, now, 1c, w...","[-0.095785536, 0.18580711, 0.1321284, -0.06492...","[(evidence-353643, 0.5666224956512451), (evide...","[evidence-353643, evidence-1056549, evidence-7..."
claim-871,"“as it happens, zika may also be a good model ...",NOT_ENOUGH_INFO,"[evidence-422399, evidence-702226, evidence-28...","[as, it, happens, zika, may, also, be, a, good...","[0.76137334, 0.19057366, -0.5716099, -0.597925...","[(evidence-515663, 0.5301102995872498), (evide...","[evidence-515663, evidence-1170825, evidence-9..."
claim-2164,greenland has only lost a tiny fraction of its...,REFUTES,"[evidence-52981, evidence-264761, evidence-947...","[greenland, has, only, lost, a, tiny, fraction...","[-0.5740872, 0.30192325, 0.36953303, -0.116996...","[(evidence-66438, 0.7130389213562012), (eviden...","[evidence-66438, evidence-324672, evidence-258..."
...,...,...,...,...,...,...,...
claim-2400,"'to suddenly label co2 as a ""pollutant"" is a d...",REFUTES,"[evidence-409365, evidence-127519, evidence-85...","[suddenly, label, co2, as, a, pollutant, is, a...","[0.04331737, -0.39057842, 0.8072592, -0.551898...","[(evidence-128516, 0.48380401730537415), (evid...",[]
claim-204,"after a natural orbitally driven warming, atmo...",NOT_ENOUGH_INFO,"[evidence-368192, evidence-261690, evidence-20...","[after, a, natural, orbitally, driven, warming...","[-0.29054934, -0.8767017, -0.17264457, -0.6553...","[(evidence-976284, 0.5429459810256958), (evide...",[evidence-976284]
claim-1426,many of the world’s coral reefs are already ba...,NOT_ENOUGH_INFO,"[evidence-1124018, evidence-995813, evidence-1...","[many, of, the, world, s, coral, reefs, are, a...","[0.37671274, 0.0071976935, -0.45794556, -0.968...","[(evidence-966474, 0.6304357647895813), (evide...","[evidence-966474, evidence-726719, evidence-38..."
claim-698,a recent study led by lawrence livermore natio...,REFUTES,[evidence-660755],"[a, recent, study, led, by, lawrence, livermor...","[-0.19982477, 0.8678314, 0.37962744, -1.004664...","[(evidence-653529, 0.5042598843574524), (evide...",[evidence-653529]


In [71]:
# Save the DataFrame to a Pickle file
train_df.to_pickle('dfInferred.pkl')
# Restore the DataFrame from the Pickle file
#train_df = pd.read_pickle('dfInferred.pkl')
train_df

Unnamed: 0,claim_text,claim_label,evidences,tokens,inferred,sims,predictions
claim-1937,not only is there no scientific evidence that ...,DISPUTED,"[evidence-442946, evidence-1194317, evidence-1...","[not, only, is, there, no, scientific, evidenc...","[0.79618436, -0.22726962, 0.14112796, -0.37445...","[(claim-1937, 0.8163521885871887), (evidence-7...","[evidence-795805, evidence-897647, evidence-45..."
claim-126,el niño drove record highs in global temperatu...,REFUTES,"[evidence-338219, evidence-1127398]","[el, niño, drove, record, highs, in, global, t...","[-0.13986285, -0.0922804, -0.34083655, -0.1689...","[(claim-126, 0.7222234606742859), (evidence-80...","[evidence-804941, evidence-450402, evidence-58..."
claim-2510,"in 1946, pdo switched to a cool phase.",SUPPORTS,"[evidence-530063, evidence-984887]","[in, 1946, pdo, switched, to, a, cool, phase]","[0.0074584247, -0.05109416, -0.29815012, 0.287...","[(claim-2510, 0.6709652543067932), (evidence-6...","[evidence-62487, evidence-393012, evidence-265..."
claim-2021,weather channel co-founder john coleman provid...,DISPUTED,"[evidence-1177431, evidence-782448, evidence-5...","[weather, channel, john, coleman, provided, ev...","[0.5130342, -0.13854378, -0.59057415, 0.173502...","[(claim-2021, 0.7391155362129211), (claim-2020...","[evidence-816582, evidence-498011, evidence-75..."
claim-2449,"""january 2008 capped a 12 month period of glob...",NOT_ENOUGH_INFO,"[evidence-1010750, evidence-91661, evidence-72...","[january, 2008, capped, a, 12, month, period, ...","[0.4213658, -1.6297535, -0.22056714, -0.293030...","[(claim-2449, 0.9100427031517029), (claim-2449...",[evidence-1001860]
...,...,...,...,...,...,...,...
claim-1504,climate scientists say that aspects of the cas...,SUPPORTS,"[evidence-1055682, evidence-1047356, evidence-...","[climate, scientists, say, that, aspects, of, ...","[0.06361664, 0.01792881, -0.3761822, 0.0437825...","[(claim-95, 0.823792576789856), (claim-1504, 0...",[evidence-948149]
claim-243,"in its 5th assessment report in 2013, the ipcc...",SUPPORTS,[evidence-916755],"[in, its, 5th, assessment, report, in, 2013, t...","[-1.6132338, -0.7972539, -0.60836667, -0.77611...","[(claim-243, 0.7400893568992615), (claim-243- ...","[evidence-57808, evidence-1033017, evidence-92..."
claim-2302,"since the mid 1970s, global temperatures have ...",NOT_ENOUGH_INFO,"[evidence-403673, evidence-889933, evidence-11...","[since, the, mid, 1970s, global, temperatures,...","[0.0022502998, -0.676605, 0.1938891, -0.014069...","[(evidence-84584, 0.5805303454399109), (eviden...","[evidence-84584, evidence-42148, evidence-1045..."
claim-502,but abnormal temperature spikes in february an...,NOT_ENOUGH_INFO,"[evidence-97375, evidence-562427, evidence-521...","[but, abnormal, temperature, spikes, in, febru...","[-0.28964463, 0.80272794, -0.39754498, -0.7879...","[(claim-502, 0.7460736036300659), (evidence-10...",[evidence-1046270]


In [72]:
# Save the DataFrame to a Pickle file
dev_df.to_pickle('devdfInferred.pkl')
# Restore the DataFrame from the Pickle file
#dev_df = pd.read_pickle('devdfInferred.pkl')
dev_df

Unnamed: 0,claim_text,claim_label,evidences,tokens,inferred,sims,predictions
claim-752,[south australia] has the most expensive elect...,SUPPORTS,"[evidence-67732, evidence-572512]","[south, australia, has, the, most, expensive, ...","[0.064753525, 0.7276473, 0.6037031, 0.19164473...","[(evidence-572512, 0.790063202381134), (eviden...","[evidence-572512, evidence-862245, evidence-88..."
claim-375,when 3 per cent of total annual global emissio...,NOT_ENOUGH_INFO,"[evidence-996421, evidence-1080858, evidence-2...","[when, 3, per, cent, of, total, annual, global...","[-0.6699967, 1.6433822, -0.301362, -0.521104, ...","[(evidence-106689, 0.5295317769050598), (evide...","[evidence-106689, evidence-1171864, evidence-1..."
claim-1266,this means that the world is now 1c warmer tha...,SUPPORTS,"[evidence-889933, evidence-694262]","[this, means, that, the, world, is, now, 1c, w...","[-0.095785536, 0.18580711, 0.1321284, -0.06492...","[(evidence-353643, 0.5666224956512451), (evide...","[evidence-353643, evidence-1056549, evidence-7..."
claim-871,"“as it happens, zika may also be a good model ...",NOT_ENOUGH_INFO,"[evidence-422399, evidence-702226, evidence-28...","[as, it, happens, zika, may, also, be, a, good...","[0.76137334, 0.19057366, -0.5716099, -0.597925...","[(evidence-515663, 0.5301102995872498), (evide...","[evidence-515663, evidence-1170825, evidence-9..."
claim-2164,greenland has only lost a tiny fraction of its...,REFUTES,"[evidence-52981, evidence-264761, evidence-947...","[greenland, has, only, lost, a, tiny, fraction...","[-0.5740872, 0.30192325, 0.36953303, -0.116996...","[(evidence-66438, 0.7130389213562012), (eviden...","[evidence-66438, evidence-324672, evidence-258..."
...,...,...,...,...,...,...,...
claim-2400,"'to suddenly label co2 as a ""pollutant"" is a d...",REFUTES,"[evidence-409365, evidence-127519, evidence-85...","[suddenly, label, co2, as, a, pollutant, is, a...","[0.04331737, -0.39057842, 0.8072592, -0.551898...","[(evidence-128516, 0.48380401730537415), (evid...",[]
claim-204,"after a natural orbitally driven warming, atmo...",NOT_ENOUGH_INFO,"[evidence-368192, evidence-261690, evidence-20...","[after, a, natural, orbitally, driven, warming...","[-0.29054934, -0.8767017, -0.17264457, -0.6553...","[(evidence-976284, 0.5429459810256958), (evide...",[evidence-976284]
claim-1426,many of the world’s coral reefs are already ba...,NOT_ENOUGH_INFO,"[evidence-1124018, evidence-995813, evidence-1...","[many, of, the, world, s, coral, reefs, are, a...","[0.37671274, 0.0071976935, -0.45794556, -0.968...","[(evidence-966474, 0.6304357647895813), (evide...","[evidence-966474, evidence-726719, evidence-38..."
claim-698,a recent study led by lawrence livermore natio...,REFUTES,[evidence-660755],"[a, recent, study, led, by, lawrence, livermor...","[-0.19982477, 0.8678314, 0.37962744, -1.004664...","[(evidence-653529, 0.5042598843574524), (evide...",[evidence-653529]


In [73]:
train_df_doc2vec = train_df.copy()
train_df_doc2vec = train_df_doc2vec.drop('evidences', axis=1)
train_df_doc2vec = train_df_doc2vec.drop('tokens', axis=1)
train_df_doc2vec = train_df_doc2vec.drop('inferred', axis=1)
train_df_doc2vec = train_df_doc2vec.drop('sims', axis=1)
train_df_doc2vec = train_df_doc2vec.rename(columns={'predictions': 'evidences'})
train_df_doc2vec

Unnamed: 0,claim_text,claim_label,evidences
claim-1937,not only is there no scientific evidence that ...,DISPUTED,"[evidence-795805, evidence-897647, evidence-45..."
claim-126,el niño drove record highs in global temperatu...,REFUTES,"[evidence-804941, evidence-450402, evidence-58..."
claim-2510,"in 1946, pdo switched to a cool phase.",SUPPORTS,"[evidence-62487, evidence-393012, evidence-265..."
claim-2021,weather channel co-founder john coleman provid...,DISPUTED,"[evidence-816582, evidence-498011, evidence-75..."
claim-2449,"""january 2008 capped a 12 month period of glob...",NOT_ENOUGH_INFO,[evidence-1001860]
...,...,...,...
claim-1504,climate scientists say that aspects of the cas...,SUPPORTS,[evidence-948149]
claim-243,"in its 5th assessment report in 2013, the ipcc...",SUPPORTS,"[evidence-57808, evidence-1033017, evidence-92..."
claim-2302,"since the mid 1970s, global temperatures have ...",NOT_ENOUGH_INFO,"[evidence-84584, evidence-42148, evidence-1045..."
claim-502,but abnormal temperature spikes in february an...,NOT_ENOUGH_INFO,[evidence-1046270]


In [74]:
dev_df_doc2vec = dev_df.copy()
dev_df_doc2vec = dev_df_doc2vec.drop('evidences', axis=1)
dev_df_doc2vec = dev_df_doc2vec.drop('tokens', axis=1)
dev_df_doc2vec = dev_df_doc2vec.drop('inferred', axis=1)
dev_df_doc2vec = dev_df_doc2vec.drop('sims', axis=1)
dev_df_doc2vec = dev_df_doc2vec.rename(columns={'predictions': 'evidences'})
dev_df_doc2vec

Unnamed: 0,claim_text,claim_label,evidences
claim-752,[south australia] has the most expensive elect...,SUPPORTS,"[evidence-572512, evidence-862245, evidence-88..."
claim-375,when 3 per cent of total annual global emissio...,NOT_ENOUGH_INFO,"[evidence-106689, evidence-1171864, evidence-1..."
claim-1266,this means that the world is now 1c warmer tha...,SUPPORTS,"[evidence-353643, evidence-1056549, evidence-7..."
claim-871,"“as it happens, zika may also be a good model ...",NOT_ENOUGH_INFO,"[evidence-515663, evidence-1170825, evidence-9..."
claim-2164,greenland has only lost a tiny fraction of its...,REFUTES,"[evidence-66438, evidence-324672, evidence-258..."
...,...,...,...
claim-2400,"'to suddenly label co2 as a ""pollutant"" is a d...",REFUTES,[]
claim-204,"after a natural orbitally driven warming, atmo...",NOT_ENOUGH_INFO,[evidence-976284]
claim-1426,many of the world’s coral reefs are already ba...,NOT_ENOUGH_INFO,"[evidence-966474, evidence-726719, evidence-38..."
claim-698,a recent study led by lawrence livermore natio...,REFUTES,[evidence-653529]


In [75]:
# Export the DataFrame to a JSON file
train_df_doc2vec.to_json('../data/train_claims_doc2vec.json', orient='index')
dev_df_doc2vec.to_json('../data/dev_claims_doc2vec.json', orient='index')