## Librerías, datos y procesamiento inicial

In [1]:
#Importamos las librerías
import pandas as pd
import numpy as np
import evaluate
from imblearn.datasets import make_imbalance
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline, TrainingArguments, Trainer, BertModel
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from datasets import Dataset,load_dataset, load_from_disk, DatasetDict
import mlflow, os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["MLFLOW_EXPERIMENT_NAME"] = "topic"
os.environ["MLFLOW_FLATTEN_PARAMS"] = "1"
os.environ["HF_MLFLOW_LOG_ARTIFACTS"] = "0"
mlflow.set_tracking_uri('http://localhost:5000')
mlflow.set_experiment(experiment_name='topic')

<Experiment: artifact_location='mlflow-artifacts:/433727646153651320', creation_time=1679618966216, experiment_id='433727646153651320', last_update_time=1679618966216, lifecycle_stage='active', name='topic', tags={}>

In [3]:
#Importamos los datos:
df = pd.read_excel(r"G:\Mi unidad\Universidad\EAFIT\2 semestre\PDG\modelos\TOPIC\data_depurada.xlsx")

In [4]:
df["label"].value_counts()

Post de interacción      2130
Dudas                    1062
Reclutamiento             489
Menciones                 436
Felicitaciones            400
Crítica                   393
Noticias                  194
Condiciones laborales     146
Alcance                   126
Oferta terceros           116
Pagos                      64
Otros                      48
Name: label, dtype: int64

In [5]:
#Procesamiento
df["text"] = df["text"].apply(lambda x: str(x))
df['label']=df['idx_label']
data = df[['text','label']]
LABEL2ID = {
        "otros": 11,
        "alcance": 0,
        "condiciones laborales": 1,
        "critica": 2,
        "dudas": 3,
        "felicitaciones": 4,
        "menciones": 5,
        "noticias": 6,
        "ofertas terceros": 7,
        "pagos": 8,
        "post interaccion": 9,
        "reclutamiento": 10
        
    }
ID2LABEL = {v: k for k, v in LABEL2ID.items()}

In [6]:
#Variables
check_point = "cardiffnlp/twitter-roberta-base-dec2021"
nombre_base = "roberta_"

## Modelo Base

In [7]:
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point,num_labels=12,id2label=ID2LABEL,label2id=LABEL2ID,ignore_mismatched_sizes=True)
classification_task = pipeline("text-classification",model = model, tokenizer=tokenizer, padding="max_length", truncation=True,max_length=128,device=0)
pred = df["text"].apply(lambda x: dict(*classification_task(str(x)))["label"])
pred = pred.replace({"otros": 11,
        "alcance": 0,
        "condiciones laborales": 1,
        "critica": 2,
        "dudas": 3,
        "felicitaciones": 4,
        "menciones": 5,
        "noticias": 6,
        "ofertas terceros": 7,
        "pagos": 8,
        "post interaccion": 9,
        "reclutamiento": 10})
print(classification_report(df["label"],pred))

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-dec2021 were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-dec2021 and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.ou

              precision    recall  f1-score   support

           0       0.02      1.00      0.04       126
           1       0.00      0.00      0.00       146
           2       0.00      0.00      0.00       393
           3       0.00      0.00      0.00      1062
           4       0.00      0.00      0.00       400
           5       0.00      0.00      0.00       436
           6       0.00      0.00      0.00       194
           7       0.00      0.00      0.00       116
           8       0.00      0.00      0.00        64
           9       0.00      0.00      0.00      2130
          10       0.00      0.00      0.00       489
          11       0.00      0.00      0.00        48

    accuracy                           0.02      5604
   macro avg       0.00      0.08      0.00      5604
weighted avg       0.00      0.02      0.00      5604



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
with mlflow.start_run(run_name=nombre_base+'BASELINE'):
    acc = accuracy_score(df["label"],pred)
    mlflow.log_metric("accuracy", acc)

## Modelo con todos los datos

In [9]:
#modelo
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point,num_labels=12,id2label=ID2LABEL,label2id=LABEL2ID,ignore_mismatched_sizes=True)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-dec2021 were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-dec2021 and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.ou

In [10]:
#Convertir a Dataset
dataset = Dataset.from_pandas(df,preserve_index=False)
train_devtest = dataset.train_test_split(shuffle = True, seed = 200, test_size=0.3)
posts_dev_test = train_devtest['test'].train_test_split(shuffle = True, seed = 200, test_size=0.50)
posts_train_dev_test_dataset = DatasetDict({
    'train': train_devtest['train'],
    'test': posts_dev_test['test'],
    'dev': posts_dev_test['train']})

In [11]:
#Tokenizar Dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True,max_length=128)
tokenized_datasets = posts_train_dev_test_dataset.map(tokenize_function, batched=True)

100%|██████████| 4/4 [00:00<00:00, 15.15ba/s]
100%|██████████| 1/1 [00:00<00:00, 21.27ba/s]
100%|██████████| 1/1 [00:00<00:00, 21.27ba/s]


In [12]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [13]:
training_args = TrainingArguments(output_dir="arj9719/bertweet_sentiment_analysis",
                                  logging_strategy="epoch",
                                  evaluation_strategy="epoch",
                                  per_device_train_batch_size=8,
                                  per_device_eval_batch_size=8,
                                  num_train_epochs=5,
                                  save_strategy = "epoch",
                                  load_best_model_at_end=True,
                                  push_to_hub=False,
                                  hub_token="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",
                                  run_name=nombre_base+"1",
                                  learning_rate=1e-6
                                  )

In [14]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["dev"],
    compute_metrics=compute_metrics
)
trainer.train()

The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 3922
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2455
  Number of trainable parameters = 124654860
 20%|██        | 491/2455 [01:02<04:05,  7.98it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 1.9842, 'learning_rate': 8e-07, 'epoch': 1.0}


                                                  
 20%|██        | 491/2455 [01:07<04:05,  7.98it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-491
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-491\config.json


{'eval_loss': 1.4356000423431396, 'eval_accuracy': 0.5552913198573127, 'eval_runtime': 4.7114, 'eval_samples_per_second': 178.502, 'eval_steps_per_second': 22.498, 'epoch': 1.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-491\pytorch_model.bin
 40%|████      | 982/2455 [02:14<03:09,  7.75it/s]  The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 1.4228, 'learning_rate': 6e-07, 'epoch': 2.0}


                                                  
 40%|████      | 982/2455 [02:19<03:09,  7.75it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-982
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-982\config.json


{'eval_loss': 1.2158514261245728, 'eval_accuracy': 0.5992865636147443, 'eval_runtime': 4.7267, 'eval_samples_per_second': 177.925, 'eval_steps_per_second': 22.426, 'epoch': 2.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-982\pytorch_model.bin
 60%|██████    | 1473/2455 [03:27<02:05,  7.82it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 1.2507, 'learning_rate': 4e-07, 'epoch': 3.0}


                                                   
 60%|██████    | 1473/2455 [03:32<02:05,  7.82it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-1473
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-1473\config.json


{'eval_loss': 1.1141380071640015, 'eval_accuracy': 0.6706302021403091, 'eval_runtime': 4.9207, 'eval_samples_per_second': 170.91, 'eval_steps_per_second': 21.542, 'epoch': 3.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-1473\pytorch_model.bin
 80%|████████  | 1964/2455 [04:51<01:01,  8.00it/s]  The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 1.1683, 'learning_rate': 2e-07, 'epoch': 4.0}


                                                   
 80%|████████  | 1964/2455 [04:56<01:01,  8.00it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-1964
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-1964\config.json


{'eval_loss': 1.0561912059783936, 'eval_accuracy': 0.6932223543400713, 'eval_runtime': 4.7839, 'eval_samples_per_second': 175.799, 'eval_steps_per_second': 22.158, 'epoch': 4.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-1964\pytorch_model.bin
100%|██████████| 2455/2455 [06:05<00:00,  7.72it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 1.132, 'learning_rate': 0.0, 'epoch': 5.0}


                                                   
100%|██████████| 2455/2455 [06:10<00:00,  7.72it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-2455
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-2455\config.json


{'eval_loss': 1.040603518486023, 'eval_accuracy': 0.6944114149821641, 'eval_runtime': 4.8876, 'eval_samples_per_second': 172.068, 'eval_steps_per_second': 21.688, 'epoch': 5.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-2455\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from arj9719/bertweet_sentiment_analysis\checkpoint-2455 (score: 1.040603518486023).
100%|██████████| 2455/2455 [06:14<00:00,  6.56it/s]

{'train_runtime': 374.4175, 'train_samples_per_second': 52.375, 'train_steps_per_second': 6.557, 'train_loss': 1.3915919162105397, 'epoch': 5.0}





TrainOutput(global_step=2455, training_loss=1.3915919162105397, metrics={'train_runtime': 374.4175, 'train_samples_per_second': 52.375, 'train_steps_per_second': 6.557, 'train_loss': 1.3915919162105397, 'epoch': 5.0})

In [15]:
test_df = pd.DataFrame(posts_train_dev_test_dataset["test"])
classification_task = pipeline("text-classification",model = 'arj9719/bertweet_sentiment_analysis\checkpoint-2455', tokenizer=tokenizer, padding="max_length", truncation=True,max_length=128, device=0)
pred = test_df["text"].apply(lambda x: dict(*classification_task(str(x)))["label"])
pred = pred.replace({"otros": 11,
        "alcance": 0,
        "condiciones laborales": 1,
        "critica": 2,
        "dudas": 3,
        "felicitaciones": 4,
        "menciones": 5,
        "noticias": 6,
        "ofertas terceros": 7,
        "pagos": 8,
        "post interaccion": 9,
        "reclutamiento": 10})
print(classification_report(test_df["label"],pred))
with mlflow.start_run(run_name=nombre_base+'1'):
    acc = accuracy_score(test_df["label"],pred)
    mlflow.log_metric("accuracy", acc)

loading configuration file arj9719/bertweet_sentiment_analysis\checkpoint-2455\config.json
Model config RobertaConfig {
  "_name_or_path": "arj9719/bertweet_sentiment_analysis\\checkpoint-2455",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "alcance",
    "1": "condiciones laborales",
    "2": "critica",
    "3": "dudas",
    "4": "felicitaciones",
    "5": "menciones",
    "6": "noticias",
    "7": "ofertas terceros",
    "8": "pagos",
    "9": "post interaccion",
    "10": "reclutamiento",
    "11": "otros"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "alcance": 0,
    "condiciones laborales": 1,
    "critica": 2,
    "dudas": 3,
    "felicitaciones": 4,
    "menciones": 5,
    "notici

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        22
           1       0.00      0.00      0.00        15
           2       0.56      0.17      0.26        58
           3       0.75      1.00      0.86       154
           4       0.49      0.83      0.62        63
           5       0.52      0.43      0.47        82
           6       0.00      0.00      0.00        22
           7       0.00      0.00      0.00        17
           8       0.00      0.00      0.00         9
           9       0.87      0.96      0.91       318
          10       0.45      0.54      0.49        78
          11       0.00      0.00      0.00         3

    accuracy                           0.71       841
   macro avg       0.30      0.33      0.30       841
weighted avg       0.63      0.71      0.66       841



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Balanceo de clases

In [17]:
df = pd.read_excel(r"G:\Mi unidad\Universidad\EAFIT\2 semestre\PDG\modelos\TOPIC\data_depurada.xlsx")
x = df[["text","idx_label"]]
y = df["label"]
x_bal, y_bal = make_imbalance(x, y, sampling_strategy={
    "Post de interacción":48,
    "Dudas":48,
    "Reclutamiento":48,
    "Menciones":48,
    "Crítica":48,
    "Felicitaciones":48,
    "Noticias":48,
    "Condiciones laborales":48,
    "Alcance":48,
    "Oferta terceros":48,
    "Pagos":48,
    "Otros":48
    }, random_state=5)

In [18]:
df = pd.concat([x_bal,y_bal],axis=1)
df["text"] = df["text"].apply(lambda x: str(x))
df['label']=df['idx_label']

In [19]:
df.label.value_counts()

0     48
1     48
2     48
3     48
4     48
5     48
6     48
7     48
11    48
8     48
9     48
10    48
Name: label, dtype: int64

## Modelo con clases balanceadas

In [20]:
#modelo
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point,num_labels=12,id2label=ID2LABEL,label2id=LABEL2ID,ignore_mismatched_sizes=True)

loading file vocab.json from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--cardiffnlp--twitter-roberta-base-dec2021\snapshots\679c13112a1410244721e95c56dbae9b1a27a782\vocab.json
loading file merges.txt from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--cardiffnlp--twitter-roberta-base-dec2021\snapshots\679c13112a1410244721e95c56dbae9b1a27a782\merges.txt
loading file tokenizer.json from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--cardiffnlp--twitter-roberta-base-dec2021\snapshots\679c13112a1410244721e95c56dbae9b1a27a782\tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--cardiffnlp--twitter-roberta-base-dec2021\snapshots\679c13112a1410244721e95c56dbae9b1a27a782\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--cardiffnlp--twitte

In [21]:
#Convertir a Dataset
dataset = Dataset.from_pandas(df,preserve_index=False)
train_devtest = dataset.train_test_split(shuffle = True, seed = 200, test_size=0.3)
posts_dev_test = train_devtest['test'].train_test_split(shuffle = True, seed = 200, test_size=0.50)
posts_train_dev_test_dataset = DatasetDict({
    'train': train_devtest['train'],
    'test': posts_dev_test['test'],
    'dev': posts_dev_test['train']})

In [22]:
#Tokenizar Dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True,max_length=128)
tokenized_datasets = posts_train_dev_test_dataset.map(tokenize_function, batched=True)

100%|██████████| 1/1 [00:00<00:00, 33.32ba/s]
100%|██████████| 1/1 [00:00<00:00, 124.95ba/s]
100%|██████████| 1/1 [00:00<00:00, 111.07ba/s]


In [23]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [25]:
training_args = TrainingArguments(output_dir="arj9719/bertweet_sentiment_analysis",
                                  logging_strategy="epoch",
                                  evaluation_strategy="epoch",
                                  per_device_train_batch_size=8,
                                  per_device_eval_batch_size=8,
                                  num_train_epochs=5,
                                  save_strategy = "epoch",
                                  load_best_model_at_end=True,
                                  push_to_hub=False,
                                  hub_token="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",
                                  run_name=nombre_base+"2",
                                  learning_rate=1e-6
                                  )

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [26]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["dev"],
    compute_metrics=compute_metrics
)
trainer.train()

The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 403
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 255
  Number of trainable parameters = 124654860
 20%|██        | 51/255 [00:08<00:28,  7.24it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.4933, 'learning_rate': 8e-07, 'epoch': 1.0}



 20%|██        | 51/255 [00:09<00:28,  7.24it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-51
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-51\config.json


{'eval_loss': 2.5016677379608154, 'eval_accuracy': 0.10465116279069768, 'eval_runtime': 0.4442, 'eval_samples_per_second': 193.624, 'eval_steps_per_second': 24.766, 'epoch': 1.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-51\pytorch_model.bin
 40%|████      | 102/255 [00:23<00:22,  6.91it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.4901, 'learning_rate': 6e-07, 'epoch': 2.0}



 40%|████      | 102/255 [00:23<00:22,  6.91it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-102
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-102\config.json


{'eval_loss': 2.4937820434570312, 'eval_accuracy': 0.10465116279069768, 'eval_runtime': 0.4477, 'eval_samples_per_second': 192.088, 'eval_steps_per_second': 24.569, 'epoch': 2.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-102\pytorch_model.bin
 60%|██████    | 153/255 [00:35<00:14,  7.20it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.4747, 'learning_rate': 4e-07, 'epoch': 3.0}



 60%|██████    | 153/255 [00:35<00:14,  7.20it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-153
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-153\config.json


{'eval_loss': 2.4872195720672607, 'eval_accuracy': 0.10465116279069768, 'eval_runtime': 0.4252, 'eval_samples_per_second': 202.28, 'eval_steps_per_second': 25.873, 'epoch': 3.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-153\pytorch_model.bin
 80%|████████  | 204/255 [01:01<00:07,  6.94it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.4708, 'learning_rate': 2e-07, 'epoch': 4.0}


                                                 
 80%|████████  | 204/255 [01:01<00:07,  6.94it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-204
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-204\config.json


{'eval_loss': 2.4840636253356934, 'eval_accuracy': 0.10465116279069768, 'eval_runtime': 0.4402, 'eval_samples_per_second': 195.384, 'eval_steps_per_second': 24.991, 'epoch': 4.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-204\pytorch_model.bin
100%|██████████| 255/255 [01:27<00:00,  6.97it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.4733, 'learning_rate': 0.0, 'epoch': 5.0}


                                                 
100%|██████████| 255/255 [01:27<00:00,  6.97it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-255
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-255\config.json


{'eval_loss': 2.4828546047210693, 'eval_accuracy': 0.10465116279069768, 'eval_runtime': 0.4402, 'eval_samples_per_second': 195.384, 'eval_steps_per_second': 24.991, 'epoch': 5.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-255\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from arj9719/bertweet_sentiment_analysis\checkpoint-255 (score: 2.4828546047210693).
100%|██████████| 255/255 [02:01<00:00,  2.11it/s]

{'train_runtime': 123.366, 'train_samples_per_second': 16.334, 'train_steps_per_second': 2.067, 'train_loss': 2.480463663736979, 'epoch': 5.0}





TrainOutput(global_step=255, training_loss=2.480463663736979, metrics={'train_runtime': 123.366, 'train_samples_per_second': 16.334, 'train_steps_per_second': 2.067, 'train_loss': 2.480463663736979, 'epoch': 5.0})

In [27]:
test_df = pd.DataFrame(posts_train_dev_test_dataset["test"])
classification_task = pipeline("text-classification",model = 'arj9719/bertweet_sentiment_analysis\checkpoint-255', tokenizer=tokenizer, padding="max_length", truncation=True,max_length=128, device=0)
pred = test_df["text"].apply(lambda x: dict(*classification_task(str(x)))["label"])
pred = pred.replace({"otros": 11,
        "alcance": 0,
        "condiciones laborales": 1,
        "critica": 2,
        "dudas": 3,
        "felicitaciones": 4,
        "menciones": 5,
        "noticias": 6,
        "ofertas terceros": 7,
        "pagos": 8,
        "post interaccion": 9,
        "reclutamiento": 10})
print(classification_report(test_df["label"],pred))
with mlflow.start_run(run_name=nombre_base+'2'):
    acc = accuracy_score(test_df["label"],pred)
    mlflow.log_metric("accuracy", acc)

loading configuration file arj9719/bertweet_sentiment_analysis\checkpoint-255\config.json
Model config RobertaConfig {
  "_name_or_path": "arj9719/bertweet_sentiment_analysis\\checkpoint-255",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "alcance",
    "1": "condiciones laborales",
    "2": "critica",
    "3": "dudas",
    "4": "felicitaciones",
    "5": "menciones",
    "6": "noticias",
    "7": "ofertas terceros",
    "8": "pagos",
    "9": "post interaccion",
    "10": "reclutamiento",
    "11": "otros"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "alcance": 0,
    "condiciones laborales": 1,
    "critica": 2,
    "dudas": 3,
    "felicitaciones": 4,
    "menciones": 5,
    "noticias

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         7
           1       0.07      1.00      0.13         6
           2       0.00      0.00      0.00         5
           3       0.00      0.00      0.00         9
           4       0.00      0.00      0.00         9
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00        10
           7       0.00      0.00      0.00         5
           8       0.00      0.00      0.00         6
           9       0.00      0.00      0.00         7
          10       0.00      0.00      0.00         7
          11       0.00      0.00      0.00         9

    accuracy                           0.07        87
   macro avg       0.01      0.08      0.01        87
weighted avg       0.00      0.07      0.01        87



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
