### Libraries

%pip install gensim nltk pandas sklearn torch rank_bm25

Code based on https://radimrehurek.com/gensim/auto_examples/tutorials/run_doc2vec_lee.html

In [1]:
import json
import pandas as pd
import numpy as np
import multiprocessing
import pickle
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, classification_report
import matplotlib.pyplot as plt

import nltk
from nltk.tokenize import word_tokenize

import gensim
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.models.callbacks import CallbackAny2Vec
import collections

from rank_bm25 import BM25Okapi

# Garbage Collector to fix memory issues
import gc

# 1. Preprocess the data

In [2]:
# Read train claims
with open('../data/train-claims.json', 'r') as f:
    claims = json.load(f)

# Read dev claims
with open('../data/dev-claims.json', 'r') as f:
    dev_claims = json.load(f)

In [3]:
# Lowercasing the 'claim_text' field for each claim
for claim_id, claim_info in claims.items():
    claim_info['claim_text'] = claim_info['claim_text'].lower()

for claim_id, claim_info in dev_claims.items():
    claim_info['claim_text'] = claim_info['claim_text'].lower()

In [4]:
# Read evidence
with open('../data/evidence.json', 'r') as f:
    evidences = json.load(f)

In [5]:
evidences = {i: str.lower(j) for i,j in evidences.items()}

In [6]:
print("Number of claims for training = {0}".format(len(claims)))
print("Number of claims for development = {0}".format(len(dev_claims)))
print("Number of evidences = {0}".format(len(evidences)))

Number of claims for training = 1228
Number of claims for development = 154
Number of evidences = 1208827


## Prepare the corpus

### Second approach

In [13]:
# Collect all texts from claims
corpus = {}

for id, claim in claims.items():
    text2 = claim['claim_text']
    # Create pairs claim + evidence
    for evidence in claim['evidences']:
        text = claim['claim_text'] + " " + evidences[evidence]
        corpus[id + ' - ' + evidence] = (str.strip(text),id)
        text2 = text2 + " " + evidences[evidence]
    # Create pairs claim + all_evidence
    corpus[id] = (str.strip(text2),claim['claim_label'])

for id, evidence in evidences.items():
    corpus[id] = (str.strip(evidence),id) # Add evidence text

### First approach

In [8]:
# Train the model in claims and evidences
# Collect all texts from claims
#corpus = {}
#for id, claim in claims.items():
#    corpus[id] = str.strip(claim['claim_text'])  # Add claim text
#for id, evidence in evidences.items():
#    corpus[id] = str.strip(evidence) # Add evidence text

In [9]:
# 
def tokenize_text(df, column):
    df['tokens'] = df[column].apply(lambda x: [token for token in word_tokenize(x) if token.isalnum()])
    return df

In [10]:
def process_row(row, index):
    return TaggedDocument(row['tokens'], tags=[row[index]])

In [14]:
# Convert the list of documents into a pandas DataFrame
df = pd.DataFrame.from_dict(corpus, orient='index', columns=['text','label'])
df = tokenize_text(df,'text')
df['tagged'] = df.apply(lambda row: process_row(row, 'label'), axis=1)
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 1214177 entries, claim-1937 - evidence-442946 to evidence-1208826
Data columns (total 4 columns):
 #   Column  Non-Null Count    Dtype 
---  ------  --------------    ----- 
 0   text    1214177 non-null  object
 1   label   1214177 non-null  object
 2   tokens  1214177 non-null  object
 3   tagged  1214177 non-null  object
dtypes: object(4)
memory usage: 46.3+ MB


Unnamed: 0,text,label,tokens,tagged
claim-1937 - evidence-442946,not only is there no scientific evidence that ...,claim-1937,"[not, only, is, there, no, scientific, evidenc...","([not, only, is, there, no, scientific, eviden..."
claim-1937 - evidence-1194317,not only is there no scientific evidence that ...,claim-1937,"[not, only, is, there, no, scientific, evidenc...","([not, only, is, there, no, scientific, eviden..."
claim-1937 - evidence-12171,not only is there no scientific evidence that ...,claim-1937,"[not, only, is, there, no, scientific, evidenc...","([not, only, is, there, no, scientific, eviden..."
claim-1937,not only is there no scientific evidence that ...,DISPUTED,"[not, only, is, there, no, scientific, evidenc...","([not, only, is, there, no, scientific, eviden..."
claim-126 - evidence-338219,el niño drove record highs in global temperatu...,claim-126,"[el, niño, drove, record, highs, in, global, t...","([el, niño, drove, record, highs, in, global, ..."


In [15]:
train_corpus = df.tagged.values
del df
gc.collect()
train_corpus

array([TaggedDocument(words=['not', 'only', 'is', 'there', 'no', 'scientific', 'evidence', 'that', 'co2', 'is', 'a', 'pollutant', 'higher', 'co2', 'concentrations', 'actually', 'help', 'ecosystems', 'support', 'more', 'plant', 'and', 'animal', 'life', 'at', 'very', 'high', 'concentrations', '100', 'times', 'atmospheric', 'concentration', 'or', 'greater', 'carbon', 'dioxide', 'can', 'be', 'toxic', 'to', 'animal', 'life', 'so', 'raising', 'the', 'concentration', 'to', 'ppm', '1', 'or', 'higher', 'for', 'several', 'hours', 'will', 'eliminate', 'pests', 'such', 'as', 'whiteflies', 'and', 'spider', 'mites', 'in', 'a', 'greenhouse'], tags=['claim-1937']),
       TaggedDocument(words=['not', 'only', 'is', 'there', 'no', 'scientific', 'evidence', 'that', 'co2', 'is', 'a', 'pollutant', 'higher', 'co2', 'concentrations', 'actually', 'help', 'ecosystems', 'support', 'more', 'plant', 'and', 'animal', 'life', 'plants', 'can', 'grow', 'as', 'much', 'as', '50', 'percent', 'faster', 'in', 'concentrati

In [16]:
# Collect all texts from dev claims
dv_corpus = {}

for id, claim in dev_claims.items():
    text2 = claim['claim_text']
    # Create pairs claim + evidence
    for evidence in claim['evidences']:
        text = claim['claim_text'] + " " + evidences[evidence]
        dv_corpus[id + ' - ' + evidence] = (str.strip(text),id)
        text2 = text2 + " " + evidences[evidence]
    # Create pairs claim + all_evidence
    dv_corpus[id] = (str.strip(text2),claim['claim_label'])

In [17]:
# Collect all texts from dev claims
dev_df = pd.DataFrame.from_dict(dv_corpus, orient='index', columns=['text','label'])
dev_df = tokenize_text(dev_df,'text')
dev_df['tagged'] = dev_df.apply(lambda row: process_row(row, 'label'), axis=1)
dev_df.info()
dev_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 645 entries, claim-752 - evidence-67732 to claim-1021
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    645 non-null    object
 1   label   645 non-null    object
 2   tokens  645 non-null    object
 3   tagged  645 non-null    object
dtypes: object(4)
memory usage: 25.2+ KB


Unnamed: 0,text,label,tokens,tagged
claim-752 - evidence-67732,[south australia] has the most expensive elect...,claim-752,"[south, australia, has, the, most, expensive, ...","([south, australia, has, the, most, expensive,..."
claim-752 - evidence-572512,[south australia] has the most expensive elect...,claim-752,"[south, australia, has, the, most, expensive, ...","([south, australia, has, the, most, expensive,..."
claim-752,[south australia] has the most expensive elect...,SUPPORTS,"[south, australia, has, the, most, expensive, ...","([south, australia, has, the, most, expensive,..."
claim-375 - evidence-996421,when 3 per cent of total annual global emissio...,claim-375,"[when, 3, per, cent, of, total, annual, global...","([when, 3, per, cent, of, total, annual, globa..."
claim-375 - evidence-1080858,when 3 per cent of total annual global emissio...,claim-375,"[when, 3, per, cent, of, total, annual, global...","([when, 3, per, cent, of, total, annual, globa..."


In [18]:
dev_corpus = dev_df.tagged.values
dev_corpus[0:5]
del dev_df
gc.collect()

0

## Train Model
https://radimrehurek.com/gensim/auto_examples/tutorials/run_doc2vec_lee.html#sphx-glr-auto-examples-tutorials-run-doc2vec-lee-py

In [19]:
cores = multiprocessing.cpu_count()
cores

20

In [20]:
# Train Doc2Vec model
model = Doc2Vec(dm=1, vector_size=50, window=5, min_count=1, workers=cores, epochs=100)

In [21]:
model.build_vocab(train_corpus)

In [22]:
from datetime import datetime

class EpochLogger(CallbackAny2Vec):
    '''Callback to log information about training'''

    def __init__(self):
        self.epoch = 0
        self.last_signal = datetime.now()

    def on_epoch_end(self, model):
        t = datetime.now() - self.last_signal
        print("Epoch #{} - Duration: {}".format(self.epoch, t))
        self.epoch += 1
        self.last_signal = datetime.now()

In [23]:
epoch_logger = EpochLogger()
model.train(train_corpus, total_examples=model.corpus_count,  epochs=model.epochs, callbacks=[epoch_logger])

Epoch #0 - Duration: 0:01:25.264472
Epoch #1 - Duration: 0:01:27.318429
Epoch #2 - Duration: 0:01:29.225444
Epoch #3 - Duration: 0:01:31.921901
Epoch #4 - Duration: 0:01:35.364130
Epoch #5 - Duration: 0:01:30.431608
Epoch #6 - Duration: 0:01:29.674299
Epoch #7 - Duration: 0:01:29.949696
Epoch #8 - Duration: 0:01:30.868905
Epoch #9 - Duration: 0:01:29.813049
Epoch #10 - Duration: 0:01:30.913694
Epoch #11 - Duration: 0:01:29.545682
Epoch #12 - Duration: 0:01:31.140924
Epoch #13 - Duration: 0:01:30.205590
Epoch #14 - Duration: 0:01:30.184621
Epoch #15 - Duration: 0:01:30.507987
Epoch #16 - Duration: 0:01:30.643670
Epoch #17 - Duration: 0:01:29.865938
Epoch #18 - Duration: 0:01:30.564549
Epoch #19 - Duration: 0:01:29.449699
Epoch #20 - Duration: 0:01:31.011032
Epoch #21 - Duration: 0:01:30.001308
Epoch #22 - Duration: 0:01:29.598903
Epoch #23 - Duration: 0:01:30.289295
Epoch #24 - Duration: 0:01:30.263775
Epoch #25 - Duration: 0:01:30.889133
Epoch #26 - Duration: 0:01:30.614006
Epoch #27 -

In [24]:
model.save("Doc2Vec.model")

In [25]:
#model.load("Doc2Vec.model")

## Assesing the model

In [26]:
evidences_df = pd.DataFrame.from_dict(evidences, orient='index', columns=['text'])
evidences_df = tokenize_text(evidences_df,'text')
evidences_df['inferred'] = evidences_df['tokens'].apply(lambda x: model.infer_vector(x))
evidences_df

Unnamed: 0,text,tokens,inferred
evidence-0,"john bennet lawes, english entrepreneur and ag...","[john, bennet, lawes, english, entrepreneur, a...","[-0.5866004, -0.5979908, -0.66681725, 0.566472..."
evidence-1,lindberg began his professional career at the ...,"[lindberg, began, his, professional, career, a...","[0.113180205, -0.7323953, -0.68200606, -0.5212..."
evidence-2,``boston (ladies of cambridge)'' by vampire we...,"[boston, ladies, of, cambridge, by, vampire, w...","[0.32170302, -1.202366, -0.62181914, -0.583831..."
evidence-3,"gerald francis goyer (born october 20, 1936) w...","[gerald, francis, goyer, born, october, 20, 19...","[-1.2727895, -0.04439112, -1.7628374, -0.21353..."
evidence-4,he detected abnormalities of oxytocinergic fun...,"[he, detected, abnormalities, of, oxytocinergi...","[0.27375388, -1.6664252, 0.21676277, -0.375012..."
...,...,...,...
evidence-1208822,also on the property is a contributing garage ...,"[also, on, the, property, is, a, contributing,...","[0.38326734, -0.082418896, 0.5778915, 0.757874..."
evidence-1208823,| class = ``fn org'' | fyrde | | | | 6110 | | ...,"[class, fn, org, fyrde, 6110, volda]","[-0.4715466, -0.62071, -0.6229989, -0.10534361..."
evidence-1208824,"dragon storm (game), a role-playing game and c...","[dragon, storm, game, a, game, and, collectibl...","[-0.052719723, -0.0347415, -1.0341836, -0.4353..."
evidence-1208825,it states that the zeriuani ``which is so grea...,"[it, states, that, the, zeriuani, which, is, s...","[-1.4580842, -0.92808354, -0.20704237, -0.4570..."


In [27]:
evidences_df.to_pickle('evidences_df.pkl')
#evidences_df = pd.read_pickle('evidences_df.pkl')  

In [28]:
claims_df = pd.DataFrame.from_dict(claims, orient='index')
claims_df = tokenize_text(claims_df,'claim_text')
claims_df['inferred'] = claims_df['tokens'].apply(lambda x: model.infer_vector(x))
claims_df

Unnamed: 0,claim_text,claim_label,evidences,tokens,inferred
claim-1937,not only is there no scientific evidence that ...,DISPUTED,"[evidence-442946, evidence-1194317, evidence-1...","[not, only, is, there, no, scientific, evidenc...","[1.382814, -1.3254179, 2.2354128, 0.6701187, 2..."
claim-126,el niño drove record highs in global temperatu...,REFUTES,"[evidence-338219, evidence-1127398]","[el, niño, drove, record, highs, in, global, t...","[2.8369098, -0.3093397, -0.0018832907, 1.09696..."
claim-2510,"in 1946, pdo switched to a cool phase.",SUPPORTS,"[evidence-530063, evidence-984887]","[in, 1946, pdo, switched, to, a, cool, phase]","[0.84987015, -0.3722064, -0.474547, -0.6948923..."
claim-2021,weather channel co-founder john coleman provid...,DISPUTED,"[evidence-1177431, evidence-782448, evidence-5...","[weather, channel, john, coleman, provided, ev...","[-0.44901243, -0.81604046, 0.46439353, 1.03122..."
claim-2449,"""january 2008 capped a 12 month period of glob...",NOT_ENOUGH_INFO,"[evidence-1010750, evidence-91661, evidence-72...","[january, 2008, capped, a, 12, month, period, ...","[-1.2736, 0.39904556, -1.0558625, 0.023528123,..."
...,...,...,...,...,...
claim-1504,climate scientists say that aspects of the cas...,SUPPORTS,"[evidence-1055682, evidence-1047356, evidence-...","[climate, scientists, say, that, aspects, of, ...","[-0.558967, -1.5743817, 0.8791474, -0.35244426..."
claim-243,"in its 5th assessment report in 2013, the ipcc...",SUPPORTS,[evidence-916755],"[in, its, 5th, assessment, report, in, 2013, t...","[1.1283017, -0.38888454, 0.5755045, 0.06229574..."
claim-2302,"since the mid 1970s, global temperatures have ...",NOT_ENOUGH_INFO,"[evidence-403673, evidence-889933, evidence-11...","[since, the, mid, 1970s, global, temperatures,...","[1.0324384, -1.2101113, -0.018202715, 0.642456..."
claim-502,but abnormal temperature spikes in february an...,NOT_ENOUGH_INFO,"[evidence-97375, evidence-562427, evidence-521...","[but, abnormal, temperature, spikes, in, febru...","[3.0434976, -1.0373944, 2.1006508, -0.4101867,..."


In [29]:
claims_df.to_pickle('claims_df.pkl')
#claims_df = pd.read_pickle('claims_df.pkl')  

### Implement BM25 Retrieval

In [30]:
# Initialize BM25 model
bm25 = BM25Okapi(evidences_df['tokens'].tolist())

In [31]:
# Calculate BM25 scores for each claim
def calculate_bm25_scores(query_tokens):
    return bm25.get_scores(query_tokens)

In [None]:
print("Generating the BM25 scores")
# Compute BM25 scores
bm25_scores = claims_df['tokens'].apply(calculate_bm25_scores)

In [None]:
bm25_scores.to_pickle('bm25_scores.pkl')
#bm25_scores = pd.read_pickle('bm25_scores.pkl')  

In [None]:
# Extract lists
claim_vectors = claims_df['inferred'].to_list()
evidence_vectors = evidences_df['inferred'].to_list()

In [None]:
print("Generating the similarities")
# Calculate Doc2Vec similarities
doc2vec_similarities = cosine_similarity(claim_vectors, evidence_vectors)

In [None]:
with open('doc2vec_similarities.pkl','wb') as f: pickle.dump(doc2vec_similarities, f)
#with open('doc2vec_similarities.pkl','rb') as f: doc2vec_similarities = pickle.load(f)

In [None]:
# Normalize the lengths
def normalize(scores):
    return (scores - np.min(scores)) / (np.max(scores) - np.min(scores))

In [None]:
document_lengths = [len(doc) for doc in evidences_df['tokens']]
normalized_lengths = normalize(document_lengths)
p_normalized_lengths = np.array(0.2 * normalized_lengths)

In [None]:
n_bm25_scores = [normalize(doc) for doc in bm25_scores]

In [None]:
n_doc2vec_similarities = normalize(doc2vec_similarities)

In [None]:
p_bm25_scores = np.array([0.4 * doc for doc in n_bm25_scores])

In [None]:
p_doc2vec_similarities = np.array([0.4 * doc for doc in n_doc2vec_similarities])

In [None]:
del document_lengths, normalized_lengths
del bm25_scores, n_bm25_scores
del doc2vec_similarities, n_doc2vec_similarities
gc.collect()

In [None]:
def get_combined_scores(bm25,similarities,lengths):
    # Initialize an array to store the sum results
    scores = []

    # Perform the element-wise addition
    for i in range(len(similarities)):
        sum_result = bm25[i] + similarities[i] + lengths
        scores.append(sum_result)
    
    return scores

### Dev claims

In [None]:
# Initialize an array to store the sum results
combined_scores = get_combined_scores(p_bm25_scores, p_doc2vec_similarities, p_normalized_lengths)

In [None]:
del p_bm25_scores, p_doc2vec_similarities
gc.collect()

In [None]:
# Rank evidences
ranked_index = np.argsort(combined_scores, axis=1)[:, ::-1]

In [None]:
with open('ranked_index.pkl','wb') as f: pickle.dump(ranked_index, f)
#with open('ranked_index.pkl','rb') as f: ranked_index = pickle.load(f)

In [None]:
dev_df = pd.DataFrame.from_dict(dev_claims, orient='index')
dev_df = tokenize_text(dev_df,'claim_text')
dev_df['inferred'] = dev_df['tokens'].apply(lambda x: model.infer_vector(x))
dev_df

In [None]:
print("Generating the dev BM25 scores")
# Compute dev BM25 scores
dev_bm25_scores = dev_df['tokens'].apply(calculate_bm25_scores)
n_dev_bm25_scores = [normalize(doc) for doc in dev_bm25_scores]
p_dev_bm25_scores = np.array([0.4 * doc for doc in n_dev_bm25_scores])

In [None]:
dev_bm25_scores.to_pickle('dev_bm25_scores.pkl')
#dev_bm25_scores = pd.read_pickle('dev_bm25_scores.pkl')  

In [None]:
# Extract lists
claim_dev_vectors = dev_df['inferred'].to_list()

In [None]:
print("Generating the dev similarities")
# Calculate Doc2Vec similarities
dev_doc2vec_similarities = cosine_similarity(claim_dev_vectors, evidence_vectors)
n_dev_doc2vec_similarities = normalize(dev_doc2vec_similarities)
p_dev_doc2vec_similarities = np.array([0.4 * doc for doc in n_dev_doc2vec_similarities])

In [None]:
with open('dev_doc2vec_similarities.pkl','wb') as f: pickle.dump(dev_doc2vec_similarities, f)
#with open('dev_doc2vec_similarities.pkl','rb') as f: dev_doc2vec_similarities = pickle.load(f)

In [None]:
# Initialize an array to store the sum results
dev_combined_scores = get_combined_scores(p_dev_bm25_scores, p_dev_doc2vec_similarities, p_normalized_lengths)

In [None]:
del p_dev_bm25_scores, p_dev_doc2vec_similarities
gc.collect()

In [None]:
# Rank evidences
dev_ranked_index = np.argsort(dev_combined_scores, axis=1)[:, ::-1]

## Classification

In [33]:
# Convert the list of documents into a pandas DataFrame
df = pd.DataFrame.from_dict(corpus, orient='index', columns=['text','label'])
df = tokenize_text(df,'text')
df['tagged'] = df.apply(lambda row: process_row(row, 'label'), axis=1)
df = df[~df['label'].str.startswith('claim')]

# Filter rows where the label does not start with 'claim'
train_corpus = df.tagged.values

In [34]:
# Collect all texts from dev claims
dev_df = pd.DataFrame.from_dict(dv_corpus, orient='index', columns=['text','label'])
dev_df = tokenize_text(dev_df,'text')
dev_df['tagged'] = dev_df.apply(lambda row: process_row(row, 'label'), axis=1)
dev_df = dev_df[~dev_df['label'].str.startswith('claim')]

# Filter rows where the label does not start with 'claim'
dev_corpus = dev_df.tagged.values

In [None]:
# Label Distribution
fig, axs = plt.subplots(1, 2, figsize=(16,6))

axs[0].set_title("Train")
axs[1].set_title("Validation")
tlabel = axs[0].hist(sorted([l for l in df['label']]))
vlabel = axs[1].hist(sorted([l for l in dev_df['label']]))

In [35]:
def vec_for_learning(model, sents):
    targets, regressors = zip(*[(doc.tags[0], model.infer_vector(doc.words)) for doc in sents])
    return targets, regressors

In [36]:
logreg = LogisticRegression(n_jobs=cores, C=1e5, max_iter=300)

In [None]:
y_train, X_train = vec_for_learning(model, train_corpus)

In [None]:
y_dev, X_dev = vec_for_learning(model, dev_corpus)

In [None]:
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_dev)
print('Testing accuracy %s' % accuracy_score(y_dev, y_pred))
print('Testing F1 score: {}'.format(f1_score(y_dev, y_pred, average='weighted')))
report = classification_report(y_dev, y_pred)
print(f"Classification Report:\n{report}")

In [None]:
y_train_pred = logreg.predict(X_train)
print('Testing accuracy %s' % accuracy_score(y_train, y_train_pred))
print('Testing F1 score: {}'.format(f1_score(y_train, y_train_pred, average='weighted')))
report = classification_report(y_train, y_train_pred)
print(f"Classification Report:\n{report}")

In [None]:
predictions = {}
for i in range(len(claims_df)):
    ev_list = ['evidence-'+ str(num) for num in ranked_index[i][:5] ]
    predictions[claims_df.index[i]] = {}
    predictions[claims_df.index[i]]["claim_text"] = claims_df.claim_text[i]
    predictions[claims_df.index[i]]["claim_label"] = y_train_pred[i]
    predictions[claims_df.index[i]]["evidences"] = ev_list

In [None]:
# Export the DataFrame to a JSON file
train_df_doc2vec = pd.DataFrame.from_dict(predictions, orient='index') 
train_df_doc2vec.to_json('../data/train_claims_doc2vec.json', orient='index')

In [None]:
dev_predictions = {}
for i in range(len(dev_df)):
    ev_list = ['evidence-'+ str(num) for num in ranked_index[i][:5] ]
    dev_predictions[dev_df.index[i]] = {}
    dev_predictions[dev_df.index[i]]["claim_text"] = dev_df.text[i]
    dev_predictions[dev_df.index[i]]["claim_label"] = y_pred[i]
    dev_predictions[dev_df.index[i]]["evidences"] = ev_list

In [None]:
# Export the DataFrame to a JSON file
dev_df_doc2vec = pd.DataFrame.from_dict(dev_predictions, orient='index') 
dev_df_doc2vec.to_json('../data/dev_claims_doc2vec.json', orient='index')

In [None]:
gc.collect()

Information Retrieval Prediction

In [None]:
# Load information retrieval
ir_dev_df = pd.read_pickle('dev-trained.pkl')

In [None]:
# Collect all texts from dev claims
ir_dev_corpus = {}

for i in range(len(ir_dev_df)):
    text2 = ir_dev_df.iloc[i].claim_text
    # Create pairs claim + evidence
    for evidence in ir_dev_df.iloc[i].evidences:
        text2 = text2 + " " + evidences[evidence]
    # Create pairs claim + all_evidence
    ir_dev_corpus[ir_dev_df.iloc[i].name] = (str.strip(text2),ir_dev_df.iloc[i].claim_label)

In [None]:
# Collect all texts from dev claims
ir_dev_df = pd.DataFrame.from_dict(ir_dev_corpus, orient='index', columns=['text','label'])
ir_dev_df = tokenize_text(ir_dev_df,'text')
ir_dev_df['tagged'] = ir_dev_df.apply(lambda row: process_row(row, 'label'), axis=1)

ir_dev_corpus = ir_dev_df.tagged.values

In [None]:
y_ir_dev, X_ir_dev = vec_for_learning(model, ir_dev_corpus)

In [None]:
y_pred = logreg.predict(X_ir_dev)
print('Testing accuracy %s' % accuracy_score(y_ir_dev, y_pred))
print('Testing F1 score: {}'.format(f1_score(y_ir_dev, y_pred, average='weighted')))
report = classification_report(y_ir_dev, y_pred)
print(f"Classification Report:\n{report}")