## Configuración

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Install needed libraries
!pip install transformers datasets
!pip install sentencepiece
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.20.1-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 5.0 MB/s 
[?25hCollecting datasets
  Downloading datasets-2.3.2-py3-none-any.whl (362 kB)
[K     |████████████████████████████████| 362 kB 53.8 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 38.1 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 61.4 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 7.7 MB/s 
Collecting aiohttp
  Downloa

In [None]:
# Import all the needed libraries
import numpy as np
import pandas as pd
import torch
import functools
import wandb
import random
import os

from datasets import Dataset, DatasetDict, load_metric

from sklearn.metrics import classification_report, confusion_matrix

from keras.callbacks import EarlyStopping

from transformers import AutoTokenizer, AutoModelForSequenceClassification, \
 TrainingArguments, Trainer, pipeline, EarlyStoppingCallback, \
 EncoderDecoderModel, RobertaTokenizerFast

In [None]:
# Check that pyTorch is identifying the GPU
if torch.cuda.device_count() > 0:
  print(f'GPU detected. Currently using: "{torch.cuda.get_device_name(0)}"')
else:
  raise Exception('Currently using CPU, change the type of the runtime in the \'runtime\' tab')

GPU detected. Currently using: "Tesla T4"


# Variables and Parameters

Change the hyperparameters and some other options here.

In [None]:
# DIFFERENT MODELS THAT I USED
#MODEL_CHECKPOINT = "PlanTL-GOB-ES/roberta-large-bne"
MODEL_CHECKPOINT = "bertin-project/bertin-roberta-base-spanish"
#MODEL_CHECKPOINT = "bert-base-uncased"
#MODEL_CHECKPOINT = "mrm8488/RuPERTa-base"
#MODEL_CHECKPOINT = "dccuchile/bert-base-spanish-wwm-cased"
#MODEL_CHECKPOINT = "dccuchile/bert-base-spanish-wwm-uncased"

# HYPERPARAMETERS OF THE MODEL
# ----------------------------------------------
TRUNCATION_LEN = 256
EPOCHS = 10
BATCH_SIZE = 6
LEARNING_RATE = 2e-05
WEIGHT_DECAY = 0.01
ES_PATIENCE = 3
RANDOM_SEED = 42
TENSORS_SEED = 42
METRIC_FOR_BEST_MODEL = "eval_loss"
# ----------------------------------------------

# PATHS
# ----------------------------------------------
DATASET_PATH = '/content/drive/MyDrive/Colab Notebooks/TFG/datasets/fakeNews_spanish/dataset/'
SAVE_PATH = '/content/drive/MyDrive/Colab Notebooks/TFG/models/btin_v.2.6/'
SUMMARIZER_NAME = 'Narrativa/bsc_roberta2roberta_shared-spanish-finetuned-mlsum-summarization'
SUMMARIZED_DATASET_PATH = '/content/drive/MyDrive/Colab Notebooks/TFG/datasets/fakeNews_spanish/sum_dataset/'
SUMMARIZED_TEST_DATASET_PATH = '/content/drive/MyDrive/Colab Notebooks/TFG/datasets/fakeNews_spanish/sum_test_dataset/'
# ----------------------------------------------


#  MAKE THE ENVIRONMENT DETERMENISTIC
# ----------------------------------------------
torch.manual_seed(TENSORS_SEED)
torch.cuda.manual_seed_all(TENSORS_SEED) 
torch.backends.cudnn.benchmark = False
os.environ["TF_DETERMINISTIC_OPS"] = "1"
# ---------------------------------------------- 

# CONFIGURATION OF THE PREPROCESSING AND TRAINING
# ----------------------------------------------
SUMMARIZE_DATA = False
USE_SUMMARIZED_DATA = False
CREATE_DATASETS = True
SAVE_MODEL = False
GENERATE_GRAPHIC = True
PREPROCESS = 1
# ----------------------------------------------

In [None]:
if USE_SUMMARIZED_DATA:
  dataset_path = SUMMARIZED_DATASET_PATH
else:
  dataset_path = DATASET_PATH

# Prepare the data

## Creation of dataset objects

This task is already done, change `CREATE_DATASETS` variable to `"True"` to execute it again.

In [None]:
if CREATE_DATASETS:
  # Load the files to dataframe
  train_data_path = '/content/drive/MyDrive/Colab Notebooks/TFG/datasets/fakeNews_spanish/train.csv'
  valid_data_path = '/content/drive/MyDrive/Colab Notebooks/TFG/datasets/fakeNews_spanish/development.csv'
  test_data_path = '/content/drive/MyDrive/Colab Notebooks/TFG/datasets/fakeNews_spanish/test.csv'

  train_df = pd.read_csv(train_data_path, encoding = 'UTF-8', sep=';', index_col=0)
  valid_df = pd.read_csv(valid_data_path, encoding = 'UTF-8', sep=';', index_col=0)
  test_df = pd.read_csv(test_data_path, encoding = 'UTF-8', sep=';', index_col=0)

  # Since the test dataset was taken under different conditions, we have to change
  # it a bit so it looks like the other two:
  # ------------------------------------------------------------------------------

  test_df.index.names = ['Id']
  test_df = test_df.rename(columns={'CATEGORY':'Category',
                                    'TOPICS': 'Topic',
                                    'SOURCE': 'Source',
                                    'HEADLINE': 'Headline',
                                    'TEXT': 'Text',
                                    'LINK': 'Link',
                                    })

  test_df["Category"].replace({"FALSO": "Fake", "VERDADERO": "True"}, inplace=True)

  # ------------------------------------------------------------------------------

  # Convert dataframes to datasets objects
  train_dataset = Dataset.from_pandas(train_df, split='train')
  valid_dataset = Dataset.from_pandas(valid_df, split='valid')
  test_dataset = Dataset.from_pandas(test_df, split='test')

  # Create a DatasetDict object to store our dataset
  dataset = DatasetDict({'train': train_dataset, 'valid': valid_dataset, 'test': test_dataset})

In [None]:
if CREATE_DATASETS:
  # Save into disk
  dataset.save_to_disk(DATASET_PATH)

## Summarization preprocessing

This cells are only for summarizing the text. This task is already done, you can load the summarized dataset turning `USE_SUMMARIZED_DATA` to `"True"` or execute this task again turning `SUMMARIZE_DATA` to `"True"`.

**WARNING**: *THIS TASK TAKES A LOT OF TIME, DO NOT EXECUTE IT UNLESS YOU ARE SURE YOU WANT TO DO IT. (MINMUN ESTIMATED TIME: 1 HOUR)*

In [None]:
if SUMMARIZE_DATA:
  # Tokenizer and model used to summarize the text
  sum_tokenizer = RobertaTokenizerFast.from_pretrained(SUMMARIZER_NAME)
  sum_model = EncoderDecoderModel.from_pretrained(SUMMARIZER_NAME)

In [None]:
if SUMMARIZE_DATA:
  def summarize_data(records):

    inputs = sum_tokenizer(records['Text'], truncation=True, padding="longest", return_tensors="pt")
    inputs_ids = inputs.input_ids
    attention_mask = inputs.attention_mask

    output = sum_model.generate(inputs_ids, attention_mask=attention_mask)
    tgt_text = sum_tokenizer.decode(output[0], skip_special_tokens=True)

    return {"Sum_text": tgt_text}

In [None]:
if SUMMARIZE_DATA == True:

  # Summarize the text of each row.
  dataset = dataset.map(summarize_data)
  test_dataset = test_dataset.map(summarize_data)

  # Save dataset to disk
  dataset.save_to_disk(SUMMARIZED_DATASET_PATH)
  test_dataset.save_to_disk(SUMMARIZED_TEST_DATASET_PATH)

## Load the data

In [None]:
# Load the dataset object from disk
dataset = DatasetDict.load_from_disk(dataset_path)

dataset

DatasetDict({
    train: Dataset({
        features: ['Category', 'Topic', 'Source', 'Headline', 'Link', 'Id', 'labels', 'Text'],
        num_rows: 676
    })
    valid: Dataset({
        features: ['Category', 'Topic', 'Source', 'Headline', 'Link', 'Id', 'labels', 'Text'],
        num_rows: 295
    })
    test: Dataset({
        features: ['Category', 'Topic', 'Source', 'Headline', 'Link', 'Id', 'Text', 'labels'],
        num_rows: 572
    })
})

In [None]:
dataset.set_format('pandas')

train_df = dataset['train'][:]
valid_df = dataset['valid'][:]
test_df = dataset['test'][:]

dataset.reset_format()

In [None]:
train_df.head()

Unnamed: 0,Category,Topic,Source,Headline,Link,Id,labels,Text
0,Fake,Education,El Ruinaversal,"RAE INCLUIRÁ LA PALABRA ""LADY"" EN EL DICCIONAR...",http://www.elruinaversal.com/2017/06/10/rae-in...,1,1,"La RAE incluirá en el diccionario el término ""..."
1,Fake,Education,Hay noticia,"La palabra ""haiga"", aceptada por la RAE",https://haynoticia.es/la-palabra-haiga-aceptad...,2,1,La Academia de la Lengua ha aceptado el uso de...
2,Fake,Education,El Ruinaversal,YORDI ROSADO ESCRIBIRÁ Y DISEÑARÁ LOS NUEVOS L...,http://www.elruinaversal.com/2018/05/06/yordi-...,3,1,El director de la Secretaría de Educación de M...
3,True,Education,EL UNIVERSAL,UNAM capacitará a maestros para aprobar prueba...,http://www.eluniversal.com.mx/articulo/nacion/...,4,0,Te ofrecemos una selección de artículos de EL ...
4,Fake,Education,Lamula,pretenden aprobar libros escolares con conteni...,https://redaccion.lamula.pe/2018/06/19/memoria...,5,1,"Una selección de artículos de EL PAÍS de hoy, ..."


In [None]:
valid_df.head()

In [None]:
test_df.head()

In [None]:
# Delete the dataframes since we are not going to use them anymore
del train_df, valid_df, test_df

## Preprocess the data

In [None]:
# FUNCTIONS FOR THE PREPROCESSING

# Concatenate source, headline and text, this will be the data to be tokenized
def concat_data(records):
  if PREPROCESS == 1:
    data = {'Data': str(records['Source']) + '. ' + str(records['Headline']) + '. ' + str(records['Text'])}
  elif PREPROCESS == 2:
    data = {'Data': str(records['Source']) + '. ' + str(records['Topic']) + '. ' + str(records['Link']) + '. ' + str(records['Text'])}
  elif PREPROCESS == 3:
    data = {'Data': str(records['Source']) + '. ' + str(records['Link']) + '. ' + str(records['Text'])}
  elif PREPROCESS == 4:
    data = {'Data': str(records['Source']) + '. ' + str(records['Link']) + '. ' + str(records['Headline']) + '. ' + str(records['Text'])}

  return data

# Set a numeric label depending on the Category
#   Label = 0 --> True
#   Label = 1 --> Fake
def set_labels(records):
  return {'labels': 0} if records['Category'] == 'True' else {'labels': 1}

In [None]:
# Map the functions to the dataset
dataset = dataset.map(concat_data)
dataset = dataset.map(set_labels)

print(dataset)



  0%|          | 0/676 [00:00<?, ?ex/s]

  0%|          | 0/295 [00:00<?, ?ex/s]

  0%|          | 0/572 [00:00<?, ?ex/s]

  0%|          | 0/676 [00:00<?, ?ex/s]

  0%|          | 0/295 [00:00<?, ?ex/s]

  0%|          | 0/572 [00:00<?, ?ex/s]

DatasetDict({
    train: Dataset({
        features: ['Category', 'Topic', 'Source', 'Headline', 'Text', 'Link', 'Id', 'Data', 'labels'],
        num_rows: 676
    })
    valid: Dataset({
        features: ['Category', 'Topic', 'Source', 'Headline', 'Text', 'Link', 'Id', 'Data', 'labels'],
        num_rows: 295
    })
    test: Dataset({
        features: ['Category', 'Topic', 'Source', 'Headline', 'Text', 'Link', 'Id', 'Data', 'labels'],
        num_rows: 572
    })
})


In [None]:
dataset['train']['Data'][0]

'El Ruinaversal. RAE INCLUIRÁ LA PALABRA "LADY" EN EL DICCIONARIO DEL IDIOMA ESPAÑOL COMO DEFINICIÓN DE "MUJER PROBLEMÁTICA". RAE INCLUIRÁ LA PALABRA "LADY" EN EL DICCIONARIO DEL IDIOMA ESPAÑOL COMO DEFINICIÓN DE "MUJER PROBLEMÁTICA"\r\nEspaña.- El presidente de la Real Academia Española (RAE), Darío Villanueva, informó en conferencia de prensa que a partir del próximo mes se incluirá el término "Lady" como una nueva palabra en el diccionario del idioma español.\r\nDarío señaló que "Lady" servirá para definir a una "mujer problemática" o a una "mujer que causa problemas", y mencionó que esta palabra será una de las pocas que también se utilizan en el idioma inglés pero que en castellano tiene un significado diferente:\r\n"Son contadas las palabras del idioma inglés que se utilizan en el español pero que tienen otro significado. Con la globalización las personas han comenzado a adoptar términos anglosajones pero los utilizan con su significado real, sin embargo en este caso la expresión

In [None]:
# Convert dataset to dataframe
dataset.set_format('pandas')
df = dataset['train'][:]
df

Unnamed: 0,Category,Topic,Source,Headline,Text,Link,Id,Data,labels
0,Fake,Education,El Ruinaversal,"RAE INCLUIRÁ LA PALABRA ""LADY"" EN EL DICCIONAR...","RAE INCLUIRÁ LA PALABRA ""LADY"" EN EL DICCIONAR...",http://www.elruinaversal.com/2017/06/10/rae-in...,1,"El Ruinaversal. RAE INCLUIRÁ LA PALABRA ""LADY""...",1
1,Fake,Education,Hay noticia,"La palabra ""haiga"", aceptada por la RAE","La palabra ""haiga"", aceptada por la RAE La Rea...",https://haynoticia.es/la-palabra-haiga-aceptad...,2,"Hay noticia. La palabra ""haiga"", aceptada por ...",1
2,Fake,Education,El Ruinaversal,YORDI ROSADO ESCRIBIRÁ Y DISEÑARÁ LOS NUEVOS L...,YORDI ROSADO ESCRIBIRÁ Y DISEÑARÁ LOS NUEVOS L...,http://www.elruinaversal.com/2018/05/06/yordi-...,3,El Ruinaversal. YORDI ROSADO ESCRIBIRÁ Y DISEÑ...,1
3,True,Education,EL UNIVERSAL,UNAM capacitará a maestros para aprobar prueba...,UNAM capacitará a maestros para aprobar prueba...,http://www.eluniversal.com.mx/articulo/nacion/...,4,EL UNIVERSAL. UNAM capacitará a maestros para ...,0
4,Fake,Education,Lamula,pretenden aprobar libros escolares con conteni...,Alerta: pretenden aprobar libros escolares con...,https://redaccion.lamula.pe/2018/06/19/memoria...,5,Lamula. pretenden aprobar libros escolares con...,1
...,...,...,...,...,...,...,...,...,...
671,Fake,Entertainment,El Dizque,Madonna será la nueva imagen del Chocolate Abu...,Madonna será la nueva imagen del Chocolate Abu...,https://www.eldizque.com/madonna-sera-la-nueva...,672,El Dizque. Madonna será la nueva imagen del Ch...,1
672,True,Entertainment,Radio Formula,"Filtran material íntimo de Zelina Vega, luchad...","Filtran material íntimo de Zelina Vega, luchad...",http://www.radioformula.com.mx/notas.asp?Idn=7...,673,Radio Formula. Filtran material íntimo de Zeli...,0
673,Fake,Entertainment,La Voz Popular,CBS PLANEA GRAN HERMANO EN LA CASA BLANCA CON ...,CBS PLANEA GRAN HERMANO EN LA CASA BLANCA CON ...,http://lavozpopular.com/cbs-planea-gran-herman...,674,La Voz Popular. CBS PLANEA GRAN HERMANO EN LA ...,1
674,Fake,Entertainment,El Dizque,TV Azteca anuncia que cambiará su nombre por u...,TV Azteca anuncia que cambiará su nombre por u...,https://www.eldizque.com/tv-azteca-anuncia-que...,675,El Dizque. TV Azteca anuncia que cambiará su n...,1


In [None]:
df.value_counts('Category')

Category
Fake    338
True    338
dtype: int64

In [None]:
dataset.reset_format()

## Tokenize the data

In [None]:
model_checkpoint = MODEL_CHECKPOINT
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

Downloading:   0%|          | 0.00/310 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/650 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/242k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/475k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/134 [00:00<?, ?B/s]

In [None]:
# Size of our one-hot-encoded vectors
tokenizer.vocab_size

31002

In [None]:
# Define the method to be mapped to the dataset to tokenize the data
def tokenize_data(records):
  return tokenizer(records['Data'], padding=True, truncation=True, max_length=TRUNCATION_LEN)

In [None]:
columns = dataset['train'].column_names
columns.remove('labels')

# Map the function, removing at the same time those columns we don't need (only apply to train and valid)
dataset = dataset.map(tokenize_data, batched=True, remove_columns=columns)

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [None]:
len(dataset['train'][0]['input_ids'])

256

# Train the model

## Load the model

In [None]:
n_labels = 2

# Get the architecture from the loaded model 
#(MODEL LOADED IN: PREPARE DATA -> TOKENIZE DATA)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=n_labels)

Downloading:   0%|          | 0.00/419M [00:00<?, ?B/s]

Some weights of the model checkpoint at dccuchile/bert-base-spanish-wwm-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuc

## Define metrics

In [None]:
accuracy = load_metric('accuracy')
f1 = load_metric('f1')

Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

In [None]:
def compute_metric(eval_pred):
  predictions, labels = eval_pred

  predictions = np.argmax(predictions, axis=1)

  result_acc = accuracy.compute(predictions=predictions, references=labels)['accuracy']
  result_f1 = f1.compute(predictions=predictions, references=labels)['f1']

  return {'accuracy': result_acc, 'f1-score': result_f1}

## Fine-tuning

In [None]:
model_name = model_checkpoint.split("/")[-1]
model_name

'bert-base-spanish-wwm-uncased'

In [None]:
# Define the training parameters
num_train_samples = dataset['train'].num_rows
train_dataset = dataset['train'].shuffle(seed=RANDOM_SEED).select(range(num_train_samples))
logging_steps = len(train_dataset) // (2 * BATCH_SIZE * EPOCHS)

# If the chosen metric for best model is 'eval_loss' we have to adjust
# a parameter so it chooses the smallest value.
if METRIC_FOR_BEST_MODEL == 'eval_loss':
  metric_condition = False
else:
  metric_condition = True

# Tell to the trainer wether it needs to generate the graphic or not
if GENERATE_GRAPHIC:
  report_option = 'wandb'
else:
  report_option = None

training_args = TrainingArguments(
    output_dir='results',
    num_train_epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    load_best_model_at_end=True,
    metric_for_best_model=METRIC_FOR_BEST_MODEL,
    greater_is_better=metric_condition,
    weight_decay=WEIGHT_DECAY,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    logging_steps=logging_steps,
    save_total_limit=3,
    report_to=report_option,
    push_to_hub=False
)

In [None]:
# Create a Trainer object that will do the work for us
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metric,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=ES_PATIENCE)],
    train_dataset=train_dataset,
    eval_dataset=dataset['valid'],
    tokenizer=tokenizer
)

In [None]:
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |  488175 KB |  488175 KB |  488175 KB |       0 B  |
|       from large pool |  487680 KB |  487680 KB |  487680 KB |       0 B  |
|       from small pool |     495 KB |     495 KB |     495 KB |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |  488175 KB |  488175 KB |  488175 KB |       0 B  |
|       from large pool |  487680 KB |  487680 KB |  487680 KB |       0 B  |
|       from small pool |     495 KB |     495 KB |     495 KB |       0 B  |
|---------------------------------------------------------------

In [None]:
next(model.parameters()).is_cuda

True

In [None]:
trainer.train()

***** Running training *****
  Num examples = 676
  Num Epochs = 10
  Instantaneous batch size per device = 6
  Total train batch size (w. parallel, distributed & accumulation) = 6
  Gradient Accumulation steps = 1
  Total optimization steps = 1130
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6004,0.4888,0.776271,0.713043
2,0.2007,0.150144,0.962712,0.96
3,0.0028,0.368981,0.922034,0.912548
4,0.0005,0.413705,0.928814,0.921348
5,0.0003,0.336695,0.952542,0.949275


***** Running Evaluation *****
  Num examples = 295
  Batch size = 6
Saving model checkpoint to results/checkpoint-113
Configuration saved in results/checkpoint-113/config.json
Model weights saved in results/checkpoint-113/pytorch_model.bin
tokenizer config file saved in results/checkpoint-113/tokenizer_config.json
Special tokens file saved in results/checkpoint-113/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 295
  Batch size = 6
Saving model checkpoint to results/checkpoint-226
Configuration saved in results/checkpoint-226/config.json
Model weights saved in results/checkpoint-226/pytorch_model.bin
tokenizer config file saved in results/checkpoint-226/tokenizer_config.json
Special tokens file saved in results/checkpoint-226/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 295
  Batch size = 6
Saving model checkpoint to results/checkpoint-339
Configuration saved in results/checkpoint-339/config.json
Model weights saved in results/checkp

TrainOutput(global_step=565, training_loss=0.1959496760588403, metrics={'train_runtime': 242.8255, 'train_samples_per_second': 27.839, 'train_steps_per_second': 4.654, 'total_flos': 444657683558400.0, 'train_loss': 0.1959496760588403, 'epoch': 5.0})

In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/accuracy,▁█▆▇█
eval/f1-score,▁█▇▇█
eval/loss,█▁▆▆▅
eval/runtime,▁▃▄▅█
eval/samples_per_second,█▆▅▄▁
eval/steps_per_second,█▆▅▄▁
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/learning_rate,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train/loss,▇█▇▅▇▆▅▆▄▄▆▅▂▂▁█▁▁▁▂▃▃▅▁▂▁▁▁▃▁▁▁▂▁▁▁▁▁▂▁

0,1
eval/accuracy,0.95254
eval/f1-score,0.94928
eval/loss,0.33669
eval/runtime,4.9922
eval/samples_per_second,59.093
eval/steps_per_second,10.016
train/epoch,5.0
train/global_step,565.0
train/learning_rate,1e-05
train/loss,0.0003


## Save the model

In [None]:
if SAVE_MODEL:
  model.save_pretrained(SAVE_PATH)

Configuration saved in /content/drive/MyDrive/Colab Notebooks/TFG/models/btin_v.2.6/config.json
Model weights saved in /content/drive/MyDrive/Colab Notebooks/TFG/models/btin_v.2.6/pytorch_model.bin


# Evaluate the model

In [None]:
predictions = trainer.predict(test_dataset=dataset['test'])

***** Running Prediction *****
  Num examples = 572
  Batch size = 6


In [None]:
predictions.metrics

{'test_accuracy': 0.833916083916084,
 'test_f1-score': 0.8263254113345521,
 'test_loss': 0.6992812752723694,
 'test_runtime': 9.2811,
 'test_samples_per_second': 61.631,
 'test_steps_per_second': 10.344}

In [None]:
predicted_labels = np.argmax(predictions.predictions, axis=1)

In [None]:
print(confusion_matrix(predictions.label_ids, predicted_labels))

[[251  35]
 [ 60 226]]


In [None]:
print(classification_report(predictions.label_ids, predicted_labels))

              precision    recall  f1-score   support

           0       0.81      0.88      0.84       286
           1       0.87      0.79      0.83       286

    accuracy                           0.83       572
   macro avg       0.84      0.83      0.83       572
weighted avg       0.84      0.83      0.83       572

