## Librerías, datos y procesamiento inicial

In [1]:
#Importamos las librerías
import pandas as pd
import numpy as np
import evaluate
from imblearn.datasets import make_imbalance
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline, TrainingArguments, Trainer, BertModel
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from datasets import Dataset,load_dataset, load_from_disk, DatasetDict
import mlflow, os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["MLFLOW_EXPERIMENT_NAME"] = "sentiment"
os.environ["MLFLOW_FLATTEN_PARAMS"] = "1"
os.environ["HF_MLFLOW_LOG_ARTIFACTS"] = "0"
mlflow.set_tracking_uri('http://localhost:5000')
mlflow.set_experiment(experiment_name='sentiment')

<Experiment: artifact_location='mlflow-artifacts:/978528365283489439', creation_time=1679586216353, experiment_id='978528365283489439', last_update_time=1679586216353, lifecycle_stage='active', name='sentiment', tags={}>

In [3]:
#Importamos los datos:
df = pd.read_excel(r"G:\Mi unidad\Universidad\EAFIT\2 semestre\PDG\modelos\sentiment_data.xlsx")
df.drop_duplicates(subset=["Full Text"], inplace = True)

In [4]:
df["Sentimiento Manual"].value_counts()

POS    44467
NEU     3622
NEG      999
Name: Sentimiento Manual, dtype: int64

In [5]:
#Limpiar columnas
df['label']=df["Sentimiento Manual"]
df['text']=df["Full Text"]
df["label"] = df["label"].replace({"NEU":1,"NEG":0,"POS":2})
df["text"] = df["text"].apply(lambda x: str(x))
data = df[['text','label']]

In [6]:
#Variables
check_point = "pysentimiento/robertuito-sentiment-analysis"
nombre_base = "ROBERTUITO_"

## Modelo Base

In [7]:
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point,num_labels=3)
sentiment_task = pipeline("sentiment-analysis",model = model, tokenizer=tokenizer, padding="max_length", truncation=True,max_length=128,device=0)
pred = data["text"].apply(lambda x: dict(*sentiment_task(str(x)))["label"])
pred = pred.replace({"NEU":1,"NEG":0,"POS":2})
print(classification_report(data["label"],pred))



              precision    recall  f1-score   support

           0       0.46      0.67      0.54       999
           1       0.07      0.84      0.13      3622
           2       0.86      0.05      0.09     44467

    accuracy                           0.12     49088
   macro avg       0.46      0.52      0.25     49088
weighted avg       0.79      0.12      0.10     49088



In [8]:
with mlflow.start_run(run_name=nombre_base+'BASELINE'):
    acc = accuracy_score(data["label"],pred)
    mlflow.log_metric("accuracy", acc)

## Modelo con todos los datos

In [9]:
#modelo
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point,num_labels=3)

In [10]:
#Convertir a Dataset
dataset = Dataset.from_pandas(data,preserve_index=False)
train_devtest = dataset.train_test_split(shuffle = True, seed = 200, test_size=0.3)
posts_dev_test = train_devtest['test'].train_test_split(shuffle = True, seed = 200, test_size=0.50)
posts_train_dev_test_dataset = DatasetDict({
    'train': train_devtest['train'],
    'test': posts_dev_test['test'],
    'dev': posts_dev_test['train']})

In [11]:
#Tokenizar Dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True,max_length=128)
tokenized_datasets = posts_train_dev_test_dataset.map(tokenize_function, batched=True)

100%|██████████| 35/35 [00:02<00:00, 14.00ba/s]
100%|██████████| 8/8 [00:00<00:00, 15.53ba/s]
100%|██████████| 8/8 [00:00<00:00, 12.60ba/s]


In [12]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [13]:
training_args = TrainingArguments(output_dir="arj9719/bertweet_sentiment_analysis",
                                  logging_strategy="epoch",
                                  evaluation_strategy="epoch",
                                  per_device_train_batch_size=8,
                                  per_device_eval_batch_size=8,
                                  num_train_epochs=5,
                                  save_strategy = "epoch",
                                  load_best_model_at_end=True,
                                  push_to_hub=False,
                                  hub_token="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",
                                  run_name=nombre_base+"1",
                                  learning_rate=1e-6
                                  )

In [14]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["dev"],
    compute_metrics=compute_metrics
)
trainer.train()

The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 34361
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 21480
  Number of trainable parameters = 108791043
 20%|██        | 4296/21480 [08:55<35:30,  8.07it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 7363
  Batch size = 8


{'loss': 0.2003, 'learning_rate': 8e-07, 'epoch': 1.0}



 20%|██        | 4296/21480 [09:58<35:30,  8.07it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-4296
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-4296\config.json


{'eval_loss': 0.18893218040466309, 'eval_accuracy': 0.9513785141925846, 'eval_runtime': 62.8159, 'eval_samples_per_second': 117.215, 'eval_steps_per_second': 14.662, 'epoch': 1.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-4296\pytorch_model.bin
 40%|████      | 8592/21480 [18:59<26:17,  8.17it/s]   The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 7363
  Batch size = 8


{'loss': 0.1665, 'learning_rate': 6e-07, 'epoch': 2.0}


                                                    
 40%|████      | 8592/21480 [19:52<26:17,  8.17it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-8592
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-8592\config.json


{'eval_loss': 0.18173792958259583, 'eval_accuracy': 0.9545022409344017, 'eval_runtime': 53.2615, 'eval_samples_per_second': 138.242, 'eval_steps_per_second': 17.292, 'epoch': 2.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-8592\pytorch_model.bin
 60%|██████    | 12888/21480 [29:02<17:40,  8.10it/s]  The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 7363
  Batch size = 8


{'loss': 0.1539, 'learning_rate': 4e-07, 'epoch': 3.0}


                                                     
 60%|██████    | 12888/21480 [30:04<17:40,  8.10it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-12888
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-12888\config.json


{'eval_loss': 0.17504063248634338, 'eval_accuracy': 0.9554529403775635, 'eval_runtime': 62.3327, 'eval_samples_per_second': 118.124, 'eval_steps_per_second': 14.776, 'epoch': 3.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-12888\pytorch_model.bin
 80%|████████  | 17184/21480 [39:05<09:22,  7.63it/s]   The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 7363
  Batch size = 8


{'loss': 0.1464, 'learning_rate': 2e-07, 'epoch': 4.0}


                                                     
 80%|████████  | 17184/21480 [39:58<09:22,  7.63it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-17184
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-17184\config.json


{'eval_loss': 0.17338623106479645, 'eval_accuracy': 0.957082710851555, 'eval_runtime': 53.6802, 'eval_samples_per_second': 137.164, 'eval_steps_per_second': 17.157, 'epoch': 4.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-17184\pytorch_model.bin
100%|██████████| 21480/21480 [49:01<00:00,  7.83it/s]   The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 7363
  Batch size = 8


{'loss': 0.1447, 'learning_rate': 0.0, 'epoch': 5.0}


                                                     
100%|██████████| 21480/21480 [50:03<00:00,  7.83it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-21480
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-21480\config.json


{'eval_loss': 0.17310276627540588, 'eval_accuracy': 0.9569468966453891, 'eval_runtime': 61.6861, 'eval_samples_per_second': 119.362, 'eval_steps_per_second': 14.93, 'epoch': 5.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-21480\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from arj9719/bertweet_sentiment_analysis\checkpoint-21480 (score: 0.17310276627540588).
100%|██████████| 21480/21480 [50:06<00:00,  7.14it/s]

{'train_runtime': 3006.7925, 'train_samples_per_second': 57.139, 'train_steps_per_second': 7.144, 'train_loss': 0.1623584086668558, 'epoch': 5.0}





TrainOutput(global_step=21480, training_loss=0.1623584086668558, metrics={'train_runtime': 3006.7925, 'train_samples_per_second': 57.139, 'train_steps_per_second': 7.144, 'train_loss': 0.1623584086668558, 'epoch': 5.0})

In [15]:
test_df = pd.DataFrame(posts_train_dev_test_dataset["test"])
sentiment_task = pipeline("sentiment-analysis",model = 'arj9719/bertweet_sentiment_analysis\checkpoint-8592', tokenizer=tokenizer, padding="max_length", truncation=True,max_length=128, device=0)
pred = test_df["text"].apply(lambda x: dict(*sentiment_task(str(x)))["label"])
pred = pred.replace({"NEU":1,"NEG":0,"POS":2})
print(classification_report(test_df["label"],pred))
with mlflow.start_run(run_name=nombre_base+'1'):
    acc = accuracy_score(test_df["label"],pred)
    mlflow.log_metric("accuracy", acc)

loading configuration file arj9719/bertweet_sentiment_analysis\checkpoint-8592\config.json
Model config RobertaConfig {
  "_name_or_path": "arj9719/bertweet_sentiment_analysis\\checkpoint-8592",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEG",
    "1": "NEU",
    "2": "POS"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEG": 0,
    "NEU": 1,
    "POS": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.26.0",
  "type_voc

              precision    recall  f1-score   support

           0       0.71      0.74      0.72       159
           1       0.78      0.57      0.65       522
           2       0.97      0.99      0.98      6683

    accuracy                           0.95      7364
   macro avg       0.82      0.76      0.78      7364
weighted avg       0.95      0.95      0.95      7364



## Balanceo de clases

In [16]:
#Separate Train Set
x = df[['text','Date']]
y = df["label"]
x_bal, y_bal = make_imbalance(x, y, sampling_strategy={1:999,2:999,0:999}, random_state=5)
x_bal = x_bal['text']

In [17]:
df = pd.concat([x_bal,y_bal],axis=1)
df["text"] = df["text"].apply(lambda x: str(x))

In [18]:
df.label.value_counts()

0    999
1    999
2    999
Name: label, dtype: int64

## Modelo con clases balanceadas

In [19]:
#modelo
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point,num_labels=3)

loading file tokenizer.json from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--pysentimiento--robertuito-sentiment-analysis\snapshots\12e030859ce19539e24b486ac84ffebb9b68ecf1\tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--pysentimiento--robertuito-sentiment-analysis\snapshots\12e030859ce19539e24b486ac84ffebb9b68ecf1\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--pysentimiento--robertuito-sentiment-analysis\snapshots\12e030859ce19539e24b486ac84ffebb9b68ecf1\tokenizer_config.json
loading configuration file config.json from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--pysentimiento--robertuito-sentiment-analysis\snapshots\12e030859ce19539e24b486ac84ffebb9b68ecf1\config.json
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertu

In [20]:
#Convertir a Dataset
dataset = Dataset.from_pandas(df,preserve_index=False)
train_devtest = dataset.train_test_split(shuffle = True, seed = 200, test_size=0.3)
posts_dev_test = train_devtest['test'].train_test_split(shuffle = True, seed = 200, test_size=0.50)
posts_train_dev_test_dataset = DatasetDict({
    'train': train_devtest['train'],
    'test': posts_dev_test['test'],
    'dev': posts_dev_test['train']})

In [21]:
#Tokenizar Dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True,max_length=128)
tokenized_datasets = posts_train_dev_test_dataset.map(tokenize_function, batched=True)

100%|██████████| 3/3 [00:00<00:00, 17.44ba/s]
100%|██████████| 1/1 [00:00<00:00, 31.24ba/s]
100%|██████████| 1/1 [00:00<00:00, 32.25ba/s]


In [22]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


In [23]:
training_args = TrainingArguments(output_dir="arj9719/bertweet_sentiment_analysis",
                                  logging_strategy="epoch",
                                  evaluation_strategy="epoch",
                                  per_device_train_batch_size=8,
                                  per_device_eval_batch_size=8,
                                  num_train_epochs=5,
                                  save_strategy = "epoch",
                                  load_best_model_at_end=True,
                                  push_to_hub=False,
                                  hub_token="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",
                                  run_name=nombre_base+"2",
                                  learning_rate=1e-6
                                  )

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [24]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["dev"],
    compute_metrics=compute_metrics
)
trainer.train()

The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 2097
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1315
  Number of trainable parameters = 108791043
 20%|██        | 263/1315 [00:35<02:11,  8.02it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8


{'loss': 0.6187, 'learning_rate': 8e-07, 'epoch': 1.0}



 20%|██        | 263/1315 [00:37<02:11,  8.02it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-263
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-263\config.json


{'eval_loss': 0.40353095531463623, 'eval_accuracy': 0.8555555555555555, 'eval_runtime': 2.4007, 'eval_samples_per_second': 187.449, 'eval_steps_per_second': 23.744, 'epoch': 1.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-263\pytorch_model.bin
 40%|████      | 526/1315 [01:14<01:36,  8.18it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8


{'loss': 0.4893, 'learning_rate': 6e-07, 'epoch': 2.0}



 40%|████      | 526/1315 [01:17<01:36,  8.18it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-526
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-526\config.json


{'eval_loss': 0.3643520772457123, 'eval_accuracy': 0.86, 'eval_runtime': 2.332, 'eval_samples_per_second': 192.971, 'eval_steps_per_second': 24.443, 'epoch': 2.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-526\pytorch_model.bin
 60%|██████    | 789/1315 [02:07<01:06,  7.91it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8


{'loss': 0.4629, 'learning_rate': 4e-07, 'epoch': 3.0}



 60%|██████    | 789/1315 [02:09<01:06,  7.91it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-789
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-789\config.json


{'eval_loss': 0.35212230682373047, 'eval_accuracy': 0.8622222222222222, 'eval_runtime': 2.4197, 'eval_samples_per_second': 185.973, 'eval_steps_per_second': 23.557, 'epoch': 3.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-789\pytorch_model.bin
 80%|████████  | 1052/1315 [02:47<00:33,  7.90it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8


{'loss': 0.4471, 'learning_rate': 2e-07, 'epoch': 4.0}


                                                   
 80%|████████  | 1052/1315 [02:49<00:33,  7.90it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-1052
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-1052\config.json


{'eval_loss': 0.3459126651287079, 'eval_accuracy': 0.8666666666666667, 'eval_runtime': 2.4176, 'eval_samples_per_second': 186.136, 'eval_steps_per_second': 23.577, 'epoch': 4.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-1052\pytorch_model.bin
100%|██████████| 1315/1315 [03:38<00:00,  7.94it/s]The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8


{'loss': 0.4357, 'learning_rate': 0.0, 'epoch': 5.0}


                                                   
100%|██████████| 1315/1315 [03:41<00:00,  7.94it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-1315
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-1315\config.json


{'eval_loss': 0.34457194805145264, 'eval_accuracy': 0.8666666666666667, 'eval_runtime': 2.4596, 'eval_samples_per_second': 182.96, 'eval_steps_per_second': 23.175, 'epoch': 5.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-1315\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from arj9719/bertweet_sentiment_analysis\checkpoint-1315 (score: 0.34457194805145264).
100%|██████████| 1315/1315 [03:57<00:00,  5.53it/s]

{'train_runtime': 237.7466, 'train_samples_per_second': 44.102, 'train_steps_per_second': 5.531, 'train_loss': 0.49076203320869477, 'epoch': 5.0}





TrainOutput(global_step=1315, training_loss=0.49076203320869477, metrics={'train_runtime': 237.7466, 'train_samples_per_second': 44.102, 'train_steps_per_second': 5.531, 'train_loss': 0.49076203320869477, 'epoch': 5.0})

In [25]:
test_df = pd.DataFrame(posts_train_dev_test_dataset["test"])
sentiment_task = pipeline("sentiment-analysis",model = 'arj9719/bertweet_sentiment_analysis\checkpoint-1315', tokenizer=tokenizer, padding="max_length", truncation=True,max_length=128, device=0)
pred = test_df["text"].apply(lambda x: dict(*sentiment_task(str(x)))["label"])
pred = pred.replace({"NEU":1,"NEG":0,"POS":2})
print(classification_report(test_df["label"],pred))
with mlflow.start_run(run_name=nombre_base+'2'):
    acc = accuracy_score(test_df["label"],pred)
    mlflow.log_metric("accuracy", acc)

loading configuration file arj9719/bertweet_sentiment_analysis\checkpoint-1315\config.json
Model config RobertaConfig {
  "_name_or_path": "arj9719/bertweet_sentiment_analysis\\checkpoint-1315",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEG",
    "1": "NEU",
    "2": "POS"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEG": 0,
    "NEU": 1,
    "POS": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.26.0",
  "type_voc

              precision    recall  f1-score   support

           0       0.87      0.85      0.86       153
           1       0.76      0.74      0.75       156
           2       0.82      0.87      0.84       141

    accuracy                           0.82       450
   macro avg       0.82      0.82      0.82       450
weighted avg       0.82      0.82      0.82       450

