In [2]:
#imports
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from datasets import Dataset, load_metric
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer

In [3]:
#Parameters
NUM_LABELS=2
keep_cols=['text','labels']
metric_list = ['accuracy', 'f1', 'precision', 'recall']
model_name = 'distilBERT'
training_data='ISOT'
eval_data_1 ='FakeNewsNet'
eval_data_2 ='LIAR'
text_col='title'
max_length = 512
batch_size = 8 
num_train_epochs=3 
seed=101
experiment = 3
input_dir_1_2 = os.path.join(os.getcwd(), f'models/{model_name}/EXP_1/2000') 
input_dir_1_5 = os.path.join(os.getcwd(), f'models/{model_name}/EXP_1/5000') 
input_dir_2 = os.path.join(os.getcwd(), f'models/{model_name}/EXP_2') 
#output_dir = os.path.join(os.getcwd(), f'models/{model_name}/EXP_{experiment}')

In [4]:
#FakeNewsNet
gossip_fake = pd.read_csv(os.path.join(os.getcwd(), f'data/{eval_data_1}/gossipcop_fake.csv'))
gossip_true = pd.read_csv(os.path.join(os.getcwd(), f'data/{eval_data_1}/gossipcop_real.csv'))
politic_fake = pd.read_csv(os.path.join(os.getcwd(),f'data/{eval_data_1}/politifact_fake.csv'))
politic_true = pd.read_csv(os.path.join(os.getcwd(),f'data/{eval_data_1}/politifact_real.csv'))

print(f'Number of True examples in PolitiFact: {politic_true.shape[0]}')
print(f'Number of True examples in GossipCop:  {gossip_true.shape[0]}')
print(f'Number of Fake examples in PolitiFact: {politic_fake.shape[0]}')
print(f'Number of Fake examples in GossipCop:  {gossip_fake.shape[0]}')

print('PoliticFact dataset:')
print(f'Average number of words in titles:\n\
      True:  {round(politic_true.title.apply(lambda x: len(x)).mean())}\n\
      Fake:  {round(politic_fake.title.apply(lambda x: len(x)).mean())}')
print('GossipCop dataset:')
print(f'Average number of words in titles:\n\
      True:  {round(gossip_true.title.apply(lambda x: len(x)).mean())}\n\
      Fake:  {round(gossip_fake.title.apply(lambda x: len(x)).mean())}')

Number of True examples in PolitiFact: 624
Number of True examples in GossipCop:  16817
Number of Fake examples in PolitiFact: 432
Number of Fake examples in GossipCop:  5323
PoliticFact dataset:
Average number of words in titles:
      True:  51
      Fake:  73
GossipCop dataset:
Average number of words in titles:
      True:  69
      Fake:  69


In [5]:
#LIAR
column_names = ['ID', 'labels', 'title', 'subject', 'speaker', 'speaker_job', 'state', 'political_party', 
                'barely true counts', 'false_counts', 'half true counts', 'mostly true counts', 'pants on fire counts',
                 'context']
liar_test = pd.read_table(os.path.join(os.getcwd(), f'data/{eval_data_2}/test.tsv'), header=None, names=column_names)
liar_train = pd.read_table(os.path.join(os.getcwd(), f'data/{eval_data_2}/train.tsv'), header=None, names=column_names)
liar_valid = pd.read_table(os.path.join(os.getcwd(),f'data/{eval_data_2}/valid.tsv'), header=None, names=column_names)
liar = pd.concat([liar_test, liar_train, liar_valid], axis=0).reset_index(drop=True)
liar_fake = liar[liar.labels=='false']
liar_true = liar[liar.labels=='true']

print(f'Number of True examples in Liar: {liar_true.shape[0]}')
print(f'Number of Fake examples in Liar: {liar_fake.shape[0]}')

print(f'Average number of words in statement:\n\
      True:  {round(liar_true.title.apply(lambda x: len(x)).mean())}\n\
      Fake:  {round(liar_fake.title.apply(lambda x: len(x)).mean())}')

print(f'Most repeated subjects:\n \
     True:  {liar_true.subject.value_counts().index.to_list()[:15]}\n\
     False: {liar_fake.subject.value_counts().index.to_list()[:15]}')

Number of True examples in Liar: 2053
Number of Fake examples in Liar: 2507
Average number of words in statement:
      True:  106
      Fake:  101
Most repeated subjects:
      True:  ['elections', 'health-care', 'taxes', 'education', 'candidates-biography', 'economy', 'guns', 'economy,jobs', 'federal-budget', 'immigration', 'abortion', 'iraq', 'energy', 'crime', 'jobs']
     False: ['health-care', 'immigration', 'elections', 'taxes', 'education', 'candidates-biography', 'state-budget', 'economy', 'abortion', 'guns', 'jobs', 'federal-budget', 'foreign-policy', 'energy', 'economy,jobs']


In [6]:
def preprocess_data(fake, true, text_col=text_col, random_state=seed):
    #Añadimos la columna label
    true['labels'] = 0
    fake['labels'] = 1
    data = pd.concat([true, fake], axis=0).sample(frac=1, random_state=random_state).reset_index(drop=True) #shuffle
    #Pasamos los textos a minúscula:
    data[text_col] = data[text_col].apply(lambda x: x.lower() if isinstance(x, str) else x)
    data = data[[text_col,'labels']].rename(columns={text_col:'text'})
    return data

gossip_data = preprocess_data(gossip_fake, gossip_true)
politic_data = preprocess_data(politic_fake, politic_true)
liar_data = preprocess_data(liar_fake, liar_true)

print(f'PolitiFact Data data label count:\n\
0: {politic_data[politic_data.labels==0].shape[0]}\n\
1: {politic_data[politic_data.labels==1].shape[0]}')

print(f'GossipCop Data data label count:\n\
0: {gossip_data[gossip_data.labels==0].shape[0]}\n\
1: {gossip_data[gossip_data.labels==1].shape[0]}')

print(f'LIAR Data data label count:\n\
0: {liar_data[liar_data.labels==0].shape[0]}\n\
1: {liar_data[liar_data.labels==1].shape[0]}')

PolitiFact Data data label count:
0: 624
1: 432
GossipCop Data data label count:
0: 16817
1: 5323
LIAR Data data label count:
0: 2053
1: 2507


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  true['labels'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fake['labels'] = 1


In [7]:
class Model:
    
    def __init__(self, checkpoint, num_labels):
        self.tokenizer = AutoTokenizer.from_pretrained(checkpoint)
        self.model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=num_labels)
        self.trainer = None
        self.training_args = None
    
    def tokenize(self, data):
        return self.tokenizer(data['text'], truncation=True)
    
    def compute_metrics(self, eval_pred):
        metric = load_metric("accuracy")
        predictions, labels = eval_pred
        predictions = np.argmax(predictions, axis=1)
        return metric.compute(predictions=predictions, references=labels)
    
    def train(self):
        print('Empezando entrenamiento:')
        self.trainer.train()
        print('Fin del Entrenamiento')
        
    def save(self, output_dir):
        print('Guardando modelo...')
        self.trainer.save_model(output_dir)
        print(f'Modelo guardado en {output_dir}')
    
    def get_predictions(self, encoded_texts):
        predictions=[]
        print('Getting predictions...')
        for _,x in tqdm(enumerate(encoded_texts), total=len(encoded_texts)):
            outputs = self.model(**x)
            logits = outputs['logits']
            y_pred = torch.argmax(logits, dim=-1)
            predictions.append(y_pred[0].item()) 
        return predictions

def tokenize_data(data, model):
    dataset = Dataset.from_pandas(data, preserve_index=False)
    return dataset.map(model.tokenize, batched=True, remove_columns='text')

def tokenize_docs(texts, tokenizer):
    return [tokenizer(str(text), truncation=True, return_tensors="pt") for _, text in tqdm(enumerate(texts), total=len(texts))]

#Evaluation
def get_metrics(eval_data, model, metric_list=metric_list):
    y_real = eval_data.labels.values.tolist()
    print('Tokenizing docs...')
    enc_eval_data = tokenize_docs(eval_data.text.values, model.tokenizer)
    y_pred = model.get_predictions(enc_eval_data)
    results={}
    print('Getting metrics:')
    for metric in metric_list:
        if metric=='accuracy':
            m = load_metric(metric)
        else:
            m = load_metric(metric, 'macro')
        results[metric] = m.compute(predictions=y_pred, references=y_real)
        print(f'{metric}: {results[metric]}')
    print(f'Confusion matrix:\n {confusion_matrix(y_real, y_pred)}')
    return results

### DistilBERT Model

In [8]:
#Load models
model1_2 = Model(input_dir_1_2, NUM_LABELS) #EXP1
model1_5 = Model(input_dir_1_5, NUM_LABELS) 
model2 = Model(input_dir_2, NUM_LABELS) #EXP2

print('Evaluating PoliticFact dataset:')
print('\nWith model trained with 2000 from Experiment 1')
m11p = get_metrics(politic_data, model1_2)
print('\nWith model trained with 5000 from Experiment 1')
m12p = get_metrics(politic_data, model1_5)
print('\nWith model from Experiment 2')
m13p = get_metrics(politic_data, model2)

print('Evaluating GossipCop dataset:')
print('\nWith model trained with 2000 from Experiment 1')
m11g = get_metrics(gossip_data, model1_2)
print('\nWith model trained with 5000 from Experiment 1')
m12g = get_metrics(gossip_data, model1_5)
print('\nWith model from Experiment 2')
m13g = get_metrics(gossip_data, model2)

print('Evaluating LIAR dataset:')
print('\nWith model trained with 2000 from Experiment 1')
m11l = get_metrics(liar_data, model1_2)
print('\nWith model trained with 5000 from Experiment 1')
m12l = get_metrics(liar_data, model1_5)
print('\nWith model from Experiment 2')
m13l = get_metrics(liar_data, model2)

Evaluating PoliticFact dataset:

With model trained with 2000 from Experiment 1
Tokenizing docs...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting metrics:


  m = load_metric(metric)


accuracy: {'accuracy': 0.7518939393939394}
f1: {'f1': 0.7095343680709535}
precision: {'precision': 0.6808510638297872}
recall: {'recall': 0.7407407407407407}
Confusion matrix:
 [[474 150]
 [112 320]]

With model trained with 5000 from Experiment 1
Tokenizing docs...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.7585227272727273}
f1: {'f1': 0.7098976109215017}
precision: {'precision': 0.697986577181208}
recall: {'recall': 0.7222222222222222}
Confusion matrix:
 [[489 135]
 [120 312]]

With model from Experiment 2
Tokenizing docs...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.4090909090909091}
f1: {'f1': 0.5806451612903226}
precision: {'precision': 0.4090909090909091}
recall: {'recall': 1.0}
Confusion matrix:
 [[  0 624]
 [  0 432]]
Evaluating GossipCop dataset:

With model trained with 2000 from Experiment 1
Tokenizing docs...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.3614724480578139}
f1: {'f1': 0.3856950419328206}
precision: {'precision': 0.25087620124364046}
recall: {'recall': 0.8337403719706932}
Confusion matrix:
 [[ 3565 13252]
 [  885  4438]]

With model trained with 5000 from Experiment 1
Tokenizing docs...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.4322041553748871}
f1: {'f1': 0.3852511125238398}
precision: {'precision': 0.26041253470844905}
recall: {'recall': 0.7399962427202705}
Confusion matrix:
 [[ 5630 11187]
 [ 1384  3939]]

With model from Experiment 2
Tokenizing docs...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.24046973803071364}
f1: {'f1': 0.3876629524433763}
precision: {'precision': 0.24043543068792628}
recall: {'recall': 1.0}
Confusion matrix:
 [[    1 16816]
 [    0  5323]]
Evaluating LIAR dataset:

With model trained with 2000 from Experiment 1
Tokenizing docs...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.5013157894736842}
f1: {'f1': 0.5028421512898994}
precision: {'precision': 0.5563618771165941}
recall: {'recall': 0.45871559633027525}
Confusion matrix:
 [[1136  917]
 [1357 1150]]

With model trained with 5000 from Experiment 1
Tokenizing docs...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.5070175438596491}
f1: {'f1': 0.5978533094812165}
precision: {'precision': 0.5420045410314629}
recall: {'recall': 0.6665337056242521}
Confusion matrix:
 [[ 641 1412]
 [ 836 1671]]

With model from Experiment 2
Tokenizing docs...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.55}
f1: {'f1': 0.7094307561597281}
precision: {'precision': 0.5499451152579583}
recall: {'recall': 0.9992022337455125}
Confusion matrix:
 [[   3 2050]
 [   2 2505]]


### BERT Model


In [9]:
model_name = 'BERT'
input_dir_1_2 = os.path.join(os.getcwd(), f'models/{model_name}/EXP_1/2000') 
input_dir_1_5 = os.path.join(os.getcwd(), f'models/{model_name}/EXP_1/5000') 
input_dir_2 = os.path.join(os.getcwd(), f'models/{model_name}/EXP_2') 
output_dir = os.path.join(os.getcwd(), f'models/{model_name}/EXP_{experiment}')

model1_2 = Model(input_dir_1_2, NUM_LABELS) #EXP1
model1_5 = Model(input_dir_1_5, NUM_LABELS) 
model2 = Model(input_dir_2, NUM_LABELS) #EXP2

print('Evaluating PoliticFact dataset:')
print('\nWith model trained with 2000 from Experiment 1')
m21p = get_metrics(politic_data, model1_2)
print('\nWith model trained with 5000 from Experiment 1')
m22p = get_metrics(politic_data, model1_5)
print('\nWith model from Experiment 2')
m23p = get_metrics(politic_data, model2)

print('Evaluating GossipCop dataset:')
print('\nWith model trained with 2000 from Experiment 1')
m21g = get_metrics(gossip_data, model1_2)
print('\nWith model trained with 5000 from Experiment 1')
m22g = get_metrics(gossip_data, model1_5)
print('\nWith model from Experiment 2')
m23g = get_metrics(gossip_data, model2)

print('Evaluating LIAR dataset:')
print('\nWith model trained with 2000 from Experiment 1')
m11l = get_metrics(liar_data, model1_2)
print('\nWith model trained with 5000 from Experiment 1')
m12l = get_metrics(liar_data, model1_5)
print('\nWith model from Experiment 2')
m13l = get_metrics(liar_data, model2)

Evaluating PoliticFact dataset:

With model trained with 2000 from Experiment 1
Tokenizing docs...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.7433712121212122}
f1: {'f1': 0.6972067039106145}
precision: {'precision': 0.673866090712743}
recall: {'recall': 0.7222222222222222}
Confusion matrix:
 [[473 151]
 [120 312]]

With model trained with 5000 from Experiment 1
Tokenizing docs...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.7339015151515151}
f1: {'f1': 0.6895027624309392}
precision: {'precision': 0.6596194503171248}
recall: {'recall': 0.7222222222222222}
Confusion matrix:
 [[463 161]
 [120 312]]

With model from Experiment 2
Tokenizing docs...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.4109848484848485}
f1: {'f1': 0.5814266487213997}
precision: {'precision': 0.4098671726755218}
recall: {'recall': 1.0}
Confusion matrix:
 [[  2 622]
 [  0 432]]
Evaluating GossipCop dataset:

With model trained with 2000 from Experiment 1
Tokenizing docs...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.35}
f1: {'f1': 0.37817050512033873}
precision: {'precision': 0.24556677890011222}
recall: {'recall': 0.822092804809318}
Confusion matrix:
 [[ 3373 13444]
 [  947  4376]]

With model trained with 5000 from Experiment 1
Tokenizing docs...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.394579945799458}
f1: {'f1': 0.37574515648286144}
precision: {'precision': 0.2497987491485541}
recall: {'recall': 0.7578433214352809}
Confusion matrix:
 [[ 4702 12115]
 [ 1289  4034]]

With model from Experiment 2
Tokenizing docs...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.2406955736224029}
f1: {'f1': 0.38768894554725913}
precision: {'precision': 0.24046629315018977}
recall: {'recall': 0.9998121360135263}
Confusion matrix:
 [[    7 16810]
 [    1  5322]]
Evaluating LIAR dataset:

With model trained with 2000 from Experiment 1
Tokenizing docs...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.5131578947368421}
f1: {'f1': 0.580973952434881}
precision: {'precision': 0.5514152633464708}
recall: {'recall': 0.6138811328280813}
Confusion matrix:
 [[ 801 1252]
 [ 968 1539]]

With model trained with 5000 from Experiment 1
Tokenizing docs...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.49890350877192985}
f1: {'f1': 0.6156433978132885}
precision: {'precision': 0.5322862129144852}
recall: {'recall': 0.7299561228560032}
Confusion matrix:
 [[ 445 1608]
 [ 677 1830]]

With model from Experiment 2
Tokenizing docs...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.5486842105263158}
f1: {'f1': 0.7077534791252483}
precision: {'precision': 0.5495038588754134}
recall: {'recall': 0.9940167530913442}
Confusion matrix:
 [[  10 2043]
 [  15 2492]]


### RoBERTa Model


In [10]:
model_name = 'RoBERTa'
input_dir_1_2 = os.path.join(os.getcwd(), f'models/{model_name}/EXP_1/2000') 
input_dir_1_5 = os.path.join(os.getcwd(), f'models/{model_name}/EXP_1/5000') 
input_dir_2 = os.path.join(os.getcwd(), f'models/{model_name}/EXP_2') 
output_dir = os.path.join(os.getcwd(), f'models/{model_name}/EXP_{experiment}')

model1_2 = Model(input_dir_1_2, NUM_LABELS) #EXP1
model1_5 = Model(input_dir_1_5, NUM_LABELS) 
model2 = Model(input_dir_2, NUM_LABELS) #EXP2

print('Evaluating PoliticFact dataset:')
print('\nWith model trained with 2000 from Experiment 1')
m21p = get_metrics(politic_data, model1_2)
print('\nWith model trained with 5000 from Experiment 1')
m22p = get_metrics(politic_data, model1_5)
print('\nWith model from Experiment 2')
m23p = get_metrics(politic_data, model2)

print('Evaluating GossipCop dataset:')
print('\nWith model trained with 2000 from Experiment 1')
m21g = get_metrics(gossip_data, model1_2)
print('\nWith model trained with 5000 from Experiment 1')
m22g = get_metrics(gossip_data, model1_5)
print('\nWith model from Experiment 2')
m23g = get_metrics(gossip_data, model2)

print('Evaluating LIAR dataset:')
print('\nWith model trained with 2000 from Experiment 1')
m11l = get_metrics(liar_data, model1_2)
print('\nWith model trained with 5000 from Experiment 1')
m12l = get_metrics(liar_data, model1_5)
print('\nWith model from Experiment 2')
m13l = get_metrics(liar_data, model2)

Evaluating PoliticFact dataset:

With model trained with 2000 from Experiment 1
Tokenizing docs...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.6950757575757576}
f1: {'f1': 0.6652806652806652}
precision: {'precision': 0.6037735849056604}
recall: {'recall': 0.7407407407407407}
Confusion matrix:
 [[414 210]
 [112 320]]

With model trained with 5000 from Experiment 1
Tokenizing docs...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.7054924242424242}
f1: {'f1': 0.6702014846235418}
precision: {'precision': 0.6183953033268101}
recall: {'recall': 0.7314814814814815}
Confusion matrix:
 [[429 195]
 [116 316]]

With model from Experiment 2
Tokenizing docs...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/1056 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.4100378787878788}
f1: {'f1': 0.5810356422326832}
precision: {'precision': 0.409478672985782}
recall: {'recall': 1.0}
Confusion matrix:
 [[  1 623]
 [  0 432]]
Evaluating GossipCop dataset:

With model trained with 2000 from Experiment 1
Tokenizing docs...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.3775519421860885}
f1: {'f1': 0.3732775478648416}
precision: {'precision': 0.24624984999399976}
recall: {'recall': 0.7709938004884463}
Confusion matrix:
 [[ 4255 12562]
 [ 1219  4104]]

With model trained with 5000 from Experiment 1
Tokenizing docs...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.4399728997289973}
f1: {'f1': 0.37051327613342133}
precision: {'precision': 0.2538611381661333}
recall: {'recall': 0.6855156866428705}
Confusion matrix:
 [[ 6092 10725]
 [ 1674  3649]]

With model from Experiment 2
Tokenizing docs...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/22140 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.24056007226738935}
f1: {'f1': 0.3876465875154782}
precision: {'precision': 0.24043370228145472}
recall: {'recall': 0.9998121360135263}
Confusion matrix:
 [[    4 16813]
 [    1  5322]]
Evaluating LIAR dataset:

With model trained with 2000 from Experiment 1
Tokenizing docs...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.5287280701754385}
f1: {'f1': 0.6329632792485055}
precision: {'precision': 0.5534647550776584}
recall: {'recall': 0.7391304347826086}
Confusion matrix:
 [[ 558 1495]
 [ 654 1853]]

With model trained with 5000 from Experiment 1
Tokenizing docs...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.5252192982456141}
f1: {'f1': 0.6732075471698112}
precision: {'precision': 0.5415250121418164}
recall: {'recall': 0.8895093737534903}
Confusion matrix:
 [[ 165 1888]
 [ 277 2230]]

With model from Experiment 2
Tokenizing docs...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting predictions...


  0%|          | 0/4560 [00:00<?, ?it/s]

Getting metrics:
accuracy: {'accuracy': 0.5510964912280701}
f1: {'f1': 0.7099333994615276}
precision: {'precision': 0.5505494505494506}
recall: {'recall': 0.9992022337455125}
Confusion matrix:
 [[   8 2045]
 [   2 2505]]
