## Librerías, datos y procesamiento inicial

In [1]:
#Importamos las librerías
import pandas as pd
import numpy as np
import evaluate
from imblearn.datasets import make_imbalance
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline, TrainingArguments, Trainer, BertModel
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from datasets import Dataset,load_dataset, load_from_disk, DatasetDict
import mlflow, os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["MLFLOW_EXPERIMENT_NAME"] = "topic"
os.environ["MLFLOW_FLATTEN_PARAMS"] = "1"
os.environ["HF_MLFLOW_LOG_ARTIFACTS"] = "0"
mlflow.set_tracking_uri('http://localhost:5000')
mlflow.set_experiment(experiment_name='topic')

<Experiment: artifact_location='mlflow-artifacts:/433727646153651320', creation_time=1679618966216, experiment_id='433727646153651320', last_update_time=1679618966216, lifecycle_stage='active', name='topic', tags={}>

In [3]:
#Importamos los datos:
df = pd.read_excel(r"G:\Mi unidad\Universidad\EAFIT\2 semestre\PDG\modelos\TOPIC\data_depurada.xlsx")

In [4]:
df["label"].value_counts()

Post de interacción      2130
Dudas                    1062
Reclutamiento             489
Menciones                 436
Felicitaciones            400
Crítica                   393
Noticias                  194
Condiciones laborales     146
Alcance                   126
Oferta terceros           116
Pagos                      64
Otros                      48
Name: label, dtype: int64

In [5]:
#Procesamiento
df["text"] = df["text"].apply(lambda x: str(x))
df['label']=df['idx_label']
data = df[['text','label']]
LABEL2ID = {
        "otros": 11,
        "alcance": 0,
        "condiciones laborales": 1,
        "critica": 2,
        "dudas": 3,
        "felicitaciones": 4,
        "menciones": 5,
        "noticias": 6,
        "ofertas terceros": 7,
        "pagos": 8,
        "post interaccion": 9,
        "reclutamiento": 10
        
    }
ID2LABEL = {v: k for k, v in LABEL2ID.items()}

In [6]:
#Variables
check_point = "finiteautomata/beto-sentiment-analysis"
nombre_base = "beto_"

## Modelo Base

In [7]:
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point,num_labels=12,id2label=ID2LABEL,label2id=LABEL2ID,ignore_mismatched_sizes=True)
classification_task = pipeline("text-classification",model = model, tokenizer=tokenizer, padding="max_length", truncation=True,max_length=128,device=0)
pred = df["text"].apply(lambda x: dict(*classification_task(str(x)))["label"])
pred = pred.replace({"otros": 11,
        "alcance": 0,
        "condiciones laborales": 1,
        "critica": 2,
        "dudas": 3,
        "felicitaciones": 4,
        "menciones": 5,
        "noticias": 6,
        "ofertas terceros": 7,
        "pagos": 8,
        "post interaccion": 9,
        "reclutamiento": 10})
print(classification_report(df["label"],pred))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at finiteautomata/beto-sentiment-analysis and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([12, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([12]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       126
           1       0.03      0.01      0.02       146
           2       0.00      0.00      0.00       393
           3       0.34      0.69      0.46      1062
           4       0.01      0.00      0.00       400
           5       0.12      0.29      0.17       436
           6       0.00      0.00      0.00       194
           7       0.01      0.14      0.02       116
           8       0.00      0.00      0.00        64
           9       0.13      0.01      0.02      2130
          10       0.08      0.01      0.02       489
          11       0.01      0.08      0.01        48

    accuracy                           0.16      5604
   macro avg       0.06      0.10      0.06      5604
weighted avg       0.13      0.16      0.11      5604



In [8]:
with mlflow.start_run(run_name=nombre_base+'BASELINE'):
    acc = accuracy_score(df["label"],pred)
    mlflow.log_metric("accuracy", acc)

## Modelo con todos los datos

In [9]:
#modelo
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point,num_labels=12,id2label=ID2LABEL,label2id=LABEL2ID,ignore_mismatched_sizes=True)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at finiteautomata/beto-sentiment-analysis and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([12, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([12]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
#Convertir a Dataset
dataset = Dataset.from_pandas(df,preserve_index=False)
train_devtest = dataset.train_test_split(shuffle = True, seed = 200, test_size=0.3)
posts_dev_test = train_devtest['test'].train_test_split(shuffle = True, seed = 200, test_size=0.50)
posts_train_dev_test_dataset = DatasetDict({
    'train': train_devtest['train'],
    'test': posts_dev_test['test'],
    'dev': posts_dev_test['train']})

In [11]:
#Tokenizar Dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True,max_length=128)
tokenized_datasets = posts_train_dev_test_dataset.map(tokenize_function, batched=True)

100%|██████████| 4/4 [00:00<00:00, 11.94ba/s]
100%|██████████| 1/1 [00:00<00:00, 16.12ba/s]
100%|██████████| 1/1 [00:00<00:00, 15.62ba/s]


In [12]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [13]:
training_args = TrainingArguments(output_dir="arj9719/bertweet_sentiment_analysis",
                                  logging_strategy="epoch",
                                  evaluation_strategy="epoch",
                                  per_device_train_batch_size=8,
                                  per_device_eval_batch_size=8,
                                  num_train_epochs=5,
                                  save_strategy = "epoch",
                                  load_best_model_at_end=True,
                                  push_to_hub=False,
                                  hub_token="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",
                                  run_name=nombre_base+"1",
                                  learning_rate=1e-6
                                  )

In [14]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["dev"],
    compute_metrics=compute_metrics
)
trainer.train()

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 3922
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2455
  Number of trainable parameters = 109863180
 20%|██        | 491/2455 [01:07<04:14,  7.70it/s]The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 1.8086, 'learning_rate': 8e-07, 'epoch': 1.0}



 20%|██        | 491/2455 [01:13<04:14,  7.70it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-491
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-491\config.json


{'eval_loss': 1.3060322999954224, 'eval_accuracy': 0.6860879904875149, 'eval_runtime': 5.574, 'eval_samples_per_second': 150.879, 'eval_steps_per_second': 19.017, 'epoch': 1.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-491\pytorch_model.bin
 40%|████      | 982/2455 [02:21<03:10,  7.71it/s]  The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 1.2209, 'learning_rate': 6e-07, 'epoch': 2.0}


                                                  
 40%|████      | 982/2455 [02:26<03:10,  7.71it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-982
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-982\config.json


{'eval_loss': 1.0403177738189697, 'eval_accuracy': 0.7193816884661117, 'eval_runtime': 5.3384, 'eval_samples_per_second': 157.537, 'eval_steps_per_second': 19.856, 'epoch': 2.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-982\pytorch_model.bin
 60%|██████    | 1473/2455 [03:41<02:02,  7.99it/s] The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 1.033, 'learning_rate': 4e-07, 'epoch': 3.0}


                                                   
 60%|██████    | 1473/2455 [03:47<02:02,  7.99it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-1473
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-1473\config.json


{'eval_loss': 0.9367765784263611, 'eval_accuracy': 0.737217598097503, 'eval_runtime': 5.3119, 'eval_samples_per_second': 158.324, 'eval_steps_per_second': 19.955, 'epoch': 3.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-1473\pytorch_model.bin
 80%|████████  | 1964/2455 [05:03<01:02,  7.85it/s]  The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 0.9505, 'learning_rate': 2e-07, 'epoch': 4.0}


                                                   
 80%|████████  | 1964/2455 [05:08<01:02,  7.85it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-1964
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-1964\config.json


{'eval_loss': 0.8870592713356018, 'eval_accuracy': 0.7491082045184304, 'eval_runtime': 5.1879, 'eval_samples_per_second': 162.109, 'eval_steps_per_second': 20.432, 'epoch': 4.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-1964\pytorch_model.bin
100%|██████████| 2455/2455 [06:20<00:00,  7.81it/s]The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 841
  Batch size = 8


{'loss': 0.8994, 'learning_rate': 0.0, 'epoch': 5.0}


                                                   
100%|██████████| 2455/2455 [06:25<00:00,  7.81it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-2455
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-2455\config.json


{'eval_loss': 0.8702495694160461, 'eval_accuracy': 0.7526753864447087, 'eval_runtime': 5.3789, 'eval_samples_per_second': 156.351, 'eval_steps_per_second': 19.707, 'epoch': 5.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-2455\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from arj9719/bertweet_sentiment_analysis\checkpoint-2455 (score: 0.8702495694160461).
100%|██████████| 2455/2455 [06:39<00:00,  6.15it/s]

{'train_runtime': 399.3523, 'train_samples_per_second': 49.105, 'train_steps_per_second': 6.147, 'train_loss': 1.1824808963690172, 'epoch': 5.0}





TrainOutput(global_step=2455, training_loss=1.1824808963690172, metrics={'train_runtime': 399.3523, 'train_samples_per_second': 49.105, 'train_steps_per_second': 6.147, 'train_loss': 1.1824808963690172, 'epoch': 5.0})

In [15]:
test_df = pd.DataFrame(posts_train_dev_test_dataset["test"])
classification_task = pipeline("text-classification",model = 'arj9719/bertweet_sentiment_analysis\checkpoint-2455', tokenizer=tokenizer, padding="max_length", truncation=True,max_length=128, device=0)
pred = test_df["text"].apply(lambda x: dict(*classification_task(str(x)))["label"])
pred = pred.replace({"otros": 11,
        "alcance": 0,
        "condiciones laborales": 1,
        "critica": 2,
        "dudas": 3,
        "felicitaciones": 4,
        "menciones": 5,
        "noticias": 6,
        "ofertas terceros": 7,
        "pagos": 8,
        "post interaccion": 9,
        "reclutamiento": 10})
print(classification_report(test_df["label"],pred))
with mlflow.start_run(run_name=nombre_base+'1'):
    acc = accuracy_score(test_df["label"],pred)
    mlflow.log_metric("accuracy", acc)

loading configuration file arj9719/bertweet_sentiment_analysis\checkpoint-2455\config.json
Model config BertConfig {
  "_name_or_path": "arj9719/bertweet_sentiment_analysis\\checkpoint-2455",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "alcance",
    "1": "condiciones laborales",
    "2": "critica",
    "3": "dudas",
    "4": "felicitaciones",
    "5": "menciones",
    "6": "noticias",
    "7": "ofertas terceros",
    "8": "pagos",
    "9": "post interaccion",
    "10": "reclutamiento",
    "11": "otros"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "alcance": 0,
    "condiciones laborales": 1,
    "critica": 2,
    "dudas": 3,
    "felicitaciones": 4,
    "menciones": 5,
    "noticias": 6,
    "ofertas terceros": 7,
    "otros": 

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        22
           1       0.00      0.00      0.00        15
           2       0.51      0.88      0.65        58
           3       0.76      0.99      0.86       154
           4       0.68      0.97      0.80        63
           5       0.65      0.16      0.25        82
           6       0.92      0.55      0.69        22
           7       0.00      0.00      0.00        17
           8       0.00      0.00      0.00         9
           9       0.83      0.96      0.89       318
          10       0.62      0.40      0.48        78
          11       0.00      0.00      0.00         3

    accuracy                           0.74       841
   macro avg       0.41      0.41      0.38       841
weighted avg       0.68      0.74      0.69       841



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Balanceo de clases

In [16]:
df = pd.read_excel(r"G:\Mi unidad\Universidad\EAFIT\2 semestre\PDG\modelos\TOPIC\data_depurada.xlsx")
x = df[["text","idx_label"]]
y = df["label"]
x_bal, y_bal = make_imbalance(x, y, sampling_strategy={
    "Post de interacción":48,
    "Dudas":48,
    "Reclutamiento":48,
    "Menciones":48,
    "Crítica":48,
    "Felicitaciones":48,
    "Noticias":48,
    "Condiciones laborales":48,
    "Alcance":48,
    "Oferta terceros":48,
    "Pagos":48,
    "Otros":48
    }, random_state=5)

In [17]:
df = pd.concat([x_bal,y_bal],axis=1)
df["text"] = df["text"].apply(lambda x: str(x))
df['label']=df['idx_label']

In [18]:
df.label.value_counts()

0     48
1     48
2     48
3     48
4     48
5     48
6     48
7     48
11    48
8     48
9     48
10    48
Name: label, dtype: int64

## Modelo con clases balanceadas

In [19]:
#modelo
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point,num_labels=12,id2label=ID2LABEL,label2id=LABEL2ID,ignore_mismatched_sizes=True)

loading configuration file config.json from cache at C:\Users\ALEJANDRO RENDON/.cache\huggingface\hub\models--finiteautomata--beto-sentiment-analysis\snapshots\9384c7f339a6d62c9e2e1d686f225f92e3a3353b\config.json
Model config BertConfig {
  "_name_or_path": "finiteautomata/beto-sentiment-analysis",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEG",
    "1": "NEU",
    "2": "POS"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEG": 0,
    "NEU": 1,
    "POS": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classificati

In [20]:
#Convertir a Dataset
dataset = Dataset.from_pandas(df,preserve_index=False)
train_devtest = dataset.train_test_split(shuffle = True, seed = 200, test_size=0.3)
posts_dev_test = train_devtest['test'].train_test_split(shuffle = True, seed = 200, test_size=0.50)
posts_train_dev_test_dataset = DatasetDict({
    'train': train_devtest['train'],
    'test': posts_dev_test['test'],
    'dev': posts_dev_test['train']})

In [21]:
#Tokenizar Dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True,max_length=128)
tokenized_datasets = posts_train_dev_test_dataset.map(tokenize_function, batched=True)

100%|██████████| 1/1 [00:00<00:00, 18.86ba/s]
100%|██████████| 1/1 [00:00<00:00, 124.96ba/s]
100%|██████████| 1/1 [00:00<00:00, 142.78ba/s]


In [22]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [23]:
training_args = TrainingArguments(output_dir="arj9719/bertweet_sentiment_analysis",
                                  logging_strategy="epoch",
                                  evaluation_strategy="epoch",
                                  per_device_train_batch_size=8,
                                  per_device_eval_batch_size=8,
                                  num_train_epochs=5,
                                  save_strategy = "epoch",
                                  load_best_model_at_end=True,
                                  push_to_hub=False,
                                  hub_token="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",
                                  run_name=nombre_base+"2",
                                  learning_rate=1e-6
                                  )

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [24]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["dev"],
    compute_metrics=compute_metrics
)
trainer.train()

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 403
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 255
  Number of trainable parameters = 109863180
 20%|██        | 51/255 [00:08<00:28,  7.18it/s]The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.4616, 'learning_rate': 8e-07, 'epoch': 1.0}


                                                
 20%|██        | 51/255 [00:08<00:28,  7.18it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-51
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-51\config.json


{'eval_loss': 2.4664790630340576, 'eval_accuracy': 0.09302325581395349, 'eval_runtime': 0.4382, 'eval_samples_per_second': 196.276, 'eval_steps_per_second': 25.105, 'epoch': 1.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-51\pytorch_model.bin
 40%|████      | 102/255 [00:31<00:20,  7.54it/s]The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.397, 'learning_rate': 6e-07, 'epoch': 2.0}


                                                 
 40%|████      | 102/255 [00:32<00:20,  7.54it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-102
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-102\config.json


{'eval_loss': 2.416167974472046, 'eval_accuracy': 0.1744186046511628, 'eval_runtime': 0.4091, 'eval_samples_per_second': 210.193, 'eval_steps_per_second': 26.885, 'epoch': 2.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-102\pytorch_model.bin
 60%|██████    | 153/255 [00:56<00:14,  7.27it/s]The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.3744, 'learning_rate': 4e-07, 'epoch': 3.0}


                                                 
 60%|██████    | 153/255 [00:57<00:14,  7.27it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-153
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-153\config.json


{'eval_loss': 2.3758203983306885, 'eval_accuracy': 0.18604651162790697, 'eval_runtime': 0.4332, 'eval_samples_per_second': 198.543, 'eval_steps_per_second': 25.395, 'epoch': 3.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-153\pytorch_model.bin
 80%|████████  | 204/255 [01:22<00:07,  6.44it/s]The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.3443, 'learning_rate': 2e-07, 'epoch': 4.0}


                                                 
 80%|████████  | 204/255 [01:23<00:07,  6.44it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-204
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-204\config.json


{'eval_loss': 2.3572914600372314, 'eval_accuracy': 0.19767441860465115, 'eval_runtime': 0.4952, 'eval_samples_per_second': 173.675, 'eval_steps_per_second': 22.214, 'epoch': 4.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-204\pytorch_model.bin
100%|██████████| 255/255 [01:34<00:00,  7.09it/s]The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, idx_label. If text, idx_label are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 86
  Batch size = 8


{'loss': 2.3244, 'learning_rate': 0.0, 'epoch': 5.0}


                                                 
100%|██████████| 255/255 [01:34<00:00,  7.09it/s]Saving model checkpoint to arj9719/bertweet_sentiment_analysis\checkpoint-255
Configuration saved in arj9719/bertweet_sentiment_analysis\checkpoint-255\config.json


{'eval_loss': 2.351274251937866, 'eval_accuracy': 0.19767441860465115, 'eval_runtime': 0.4522, 'eval_samples_per_second': 190.197, 'eval_steps_per_second': 24.328, 'epoch': 5.0}


Model weights saved in arj9719/bertweet_sentiment_analysis\checkpoint-255\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from arj9719/bertweet_sentiment_analysis\checkpoint-255 (score: 2.351274251937866).
100%|██████████| 255/255 [02:06<00:00,  2.01it/s]

{'train_runtime': 126.9608, 'train_samples_per_second': 15.871, 'train_steps_per_second': 2.008, 'train_loss': 2.3803559845569087, 'epoch': 5.0}





TrainOutput(global_step=255, training_loss=2.3803559845569087, metrics={'train_runtime': 126.9608, 'train_samples_per_second': 15.871, 'train_steps_per_second': 2.008, 'train_loss': 2.3803559845569087, 'epoch': 5.0})

In [25]:
test_df = pd.DataFrame(posts_train_dev_test_dataset["test"])
classification_task = pipeline("text-classification",model = 'arj9719/bertweet_sentiment_analysis\checkpoint-255', tokenizer=tokenizer, padding="max_length", truncation=True,max_length=128, device=0)
pred = test_df["text"].apply(lambda x: dict(*classification_task(str(x)))["label"])
pred = pred.replace({"otros": 11,
        "alcance": 0,
        "condiciones laborales": 1,
        "critica": 2,
        "dudas": 3,
        "felicitaciones": 4,
        "menciones": 5,
        "noticias": 6,
        "ofertas terceros": 7,
        "pagos": 8,
        "post interaccion": 9,
        "reclutamiento": 10})
print(classification_report(test_df["label"],pred))
with mlflow.start_run(run_name=nombre_base+'2'):
    acc = accuracy_score(test_df["label"],pred)
    mlflow.log_metric("accuracy", acc)

loading configuration file arj9719/bertweet_sentiment_analysis\checkpoint-255\config.json
Model config BertConfig {
  "_name_or_path": "arj9719/bertweet_sentiment_analysis\\checkpoint-255",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "alcance",
    "1": "condiciones laborales",
    "2": "critica",
    "3": "dudas",
    "4": "felicitaciones",
    "5": "menciones",
    "6": "noticias",
    "7": "ofertas terceros",
    "8": "pagos",
    "9": "post interaccion",
    "10": "reclutamiento",
    "11": "otros"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "alcance": 0,
    "condiciones laborales": 1,
    "critica": 2,
    "dudas": 3,
    "felicitaciones": 4,
    "menciones": 5,
    "noticias": 6,
    "ofertas terceros": 7,
    "otros": 11

              precision    recall  f1-score   support

           0       0.33      0.14      0.20         7
           1       0.00      0.00      0.00         6
           2       0.18      0.80      0.30         5
           3       0.00      0.00      0.00         9
           4       0.57      0.44      0.50         9
           5       0.25      0.14      0.18         7
           6       0.17      0.20      0.18        10
           7       0.33      0.20      0.25         5
           8       0.00      0.00      0.00         6
           9       1.00      0.43      0.60         7
          10       0.16      0.43      0.23         7
          11       0.00      0.00      0.00         9

    accuracy                           0.22        87
   macro avg       0.25      0.23      0.20        87
weighted avg       0.25      0.22      0.20        87

