In [1]:
!pip install numpy
!pip install torch
import numpy #1.24
print(numpy.__version__)

!pip install transformers[torch]
!pip install transformers
!pip install scikit-learn
!pip install sentencepiece
!pip install protobuf

!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118


1.24.0
Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118


In [26]:
import torch
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('GPU disponivel')
    print('Número de GPUs disponíveis:', torch.cuda.device_count())
    print('Nome da GPU:', torch.cuda.get_device_name(0))

else:
    device = torch.device('cpu')
    print('GPU nao disponivel, utilizando CPU')

GPU disponivel
Número de GPUs disponíveis: 1
Nome da GPU: NVIDIA GeForce RTX 2070


In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split

data = pd.read_csv('output_deberta.csv', sep=';')

inputs = [str(text) for text in data['texto'].tolist()]

outputs = data['sentiment'].tolist()

train_inputs, test_inputs, train_outputs, test_outputs = train_test_split(inputs, outputs, test_size=0.2, random_state=42)

In [28]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader


class Dataset(Dataset):
    def __init__(self, inputs, outputs, tokenizer, max_length=512):
        self.inputs = inputs
        self.outputs = outputs
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, idx):
        input_text = self.inputs[idx]
        label = self.outputs[idx]

        input_encoding = self.tokenizer(
            input_text, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='pt'
        )

        input_ids = input_encoding['input_ids'].squeeze()
        attention_mask = input_encoding['attention_mask'].squeeze()

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.tensor(label)  # Certificar que os labels sejam tensores
        }
    

model_name = 'mrm8488/deberta-v3-ft-financial-news-sentiment-analysis'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained("mrm8488/deberta-v3-ft-financial-news-sentiment-analysis", num_labels=3).to(device)


train_dataset = Dataset(train_inputs, train_outputs, tokenizer)
test_dataset = Dataset(test_inputs, test_outputs, tokenizer)

In [29]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
import os

training_args = TrainingArguments(
    output_dir='./results_dir',
    num_train_epochs=4,
    evaluation_strategy="steps",
    per_device_train_batch_size=16,  # Increased batch size
    per_device_eval_batch_size=16,  # Increased batch size
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    fp16=True,  # Enable mixed precision training
    gradient_accumulation_steps=2  # Accumulate gradients
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)



trainer.train()

model_save_path = "./results_model/_model"
tokenizer_save_path = "./results_model/tokenizer"


os.makedirs(model_save_path, exist_ok=True)
os.makedirs(tokenizer_save_path, exist_ok=True)

model.save_pretrained(model_save_path)
tokenizer.save_pretrained(tokenizer_save_path)

print(f"Modelo salvo em {model_save_path}")
print(f"Tokenizador salvo em {tokenizer_save_path}")

  0%|          | 0/696 [16:50<?, ?it/s]


[A[A                                         
                                                  
  4%|▍         | 30/696 [13:51<2:35:25, 14.00s/it]

{'loss': 0.5169, 'grad_norm': 30.085582733154297, 'learning_rate': 9e-07, 'epoch': 0.23}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [14:02<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.127266064286232, 'eval_runtime': 10.5039, 'eval_samples_per_second': 33.131, 'eval_steps_per_second': 2.094, 'epoch': 0.23}




[A[A                                         
                                                  
  4%|▍         | 30/696 [14:34<2:35:25, 14.00s/it]

{'loss': 0.4629, 'grad_norm': 29.735414505004883, 'learning_rate': 1.8e-06, 'epoch': 0.46}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [14:45<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.30366504192352295, 'eval_runtime': 10.5079, 'eval_samples_per_second': 33.118, 'eval_steps_per_second': 2.094, 'epoch': 0.46}




[A[A                                         
                                                  
  4%|▍         | 30/696 [15:18<2:35:25, 14.00s/it]

{'loss': 0.4512, 'grad_norm': 13.353433609008789, 'learning_rate': 2.8000000000000003e-06, 'epoch': 0.69}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [15:28<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.2310512214899063, 'eval_runtime': 10.2746, 'eval_samples_per_second': 33.87, 'eval_steps_per_second': 2.141, 'epoch': 0.69}




[A[A                                         
                                                  
  4%|▍         | 30/696 [16:01<2:35:25, 14.00s/it]

{'loss': 0.3922, 'grad_norm': 31.126903533935547, 'learning_rate': 3.8e-06, 'epoch': 0.92}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [16:13<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.11676185578107834, 'eval_runtime': 11.3242, 'eval_samples_per_second': 30.731, 'eval_steps_per_second': 1.943, 'epoch': 0.92}




[A[A                                         
                                                  
  4%|▍         | 30/696 [16:58<2:35:25, 14.00s/it]

{'loss': 0.4367, 'grad_norm': 22.858549118041992, 'learning_rate': 4.800000000000001e-06, 'epoch': 1.15}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [17:09<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.3915941119194031, 'eval_runtime': 10.8734, 'eval_samples_per_second': 32.005, 'eval_steps_per_second': 2.023, 'epoch': 1.15}




[A[A                                         
                                                  
  4%|▍         | 30/696 [18:00<2:35:25, 14.00s/it]

{'loss': 0.3809, 'grad_norm': 33.697105407714844, 'learning_rate': 5.8e-06, 'epoch': 1.38}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [18:13<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.1581508368253708, 'eval_runtime': 13.3864, 'eval_samples_per_second': 25.997, 'eval_steps_per_second': 1.643, 'epoch': 1.38}




[A[A                                         
                                                  
  4%|▍         | 30/696 [19:01<2:35:25, 14.00s/it]

{'loss': 0.3848, 'grad_norm': 36.79335403442383, 'learning_rate': 6.800000000000001e-06, 'epoch': 1.61}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [19:11<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.36907288432121277, 'eval_runtime': 9.9055, 'eval_samples_per_second': 35.132, 'eval_steps_per_second': 2.221, 'epoch': 1.61}




[A[A                                         
                                                  
  4%|▍         | 30/696 [20:10<2:35:25, 14.00s/it]

{'loss': 0.381, 'grad_norm': 21.712417602539062, 'learning_rate': 7.8e-06, 'epoch': 1.84}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [20:19<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.17739473283290863, 'eval_runtime': 9.7016, 'eval_samples_per_second': 35.87, 'eval_steps_per_second': 2.268, 'epoch': 1.84}




[A[A                                         
                                                  
  4%|▍         | 30/696 [21:09<2:35:25, 14.00s/it]

{'loss': 0.3458, 'grad_norm': 21.88872528076172, 'learning_rate': 8.8e-06, 'epoch': 2.07}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [21:19<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.25456926226615906, 'eval_runtime': 10.5484, 'eval_samples_per_second': 32.991, 'eval_steps_per_second': 2.086, 'epoch': 2.07}




[A[A                                         
                                                  
  4%|▍         | 30/696 [21:54<2:35:25, 14.00s/it]

{'loss': 0.2933, 'grad_norm': 11.948196411132812, 'learning_rate': 9.800000000000001e-06, 'epoch': 2.3}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [22:04<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.24668318033218384, 'eval_runtime': 10.3709, 'eval_samples_per_second': 33.556, 'eval_steps_per_second': 2.121, 'epoch': 2.3}




[A[A                                         
                                                  
  4%|▍         | 30/696 [22:40<2:35:25, 14.00s/it]

{'loss': 0.2837, 'grad_norm': 13.084911346435547, 'learning_rate': 1.08e-05, 'epoch': 2.53}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [22:50<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.22268085181713104, 'eval_runtime': 10.3743, 'eval_samples_per_second': 33.544, 'eval_steps_per_second': 2.121, 'epoch': 2.53}




[A[A                                         
                                                  
  4%|▍         | 30/696 [23:30<2:35:25, 14.00s/it]

{'loss': 0.3041, 'grad_norm': 35.463661193847656, 'learning_rate': 1.18e-05, 'epoch': 2.76}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [23:43<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.2935056686401367, 'eval_runtime': 12.5624, 'eval_samples_per_second': 27.702, 'eval_steps_per_second': 1.751, 'epoch': 2.76}




[A[A                                         
                                                  
  4%|▍         | 30/696 [24:21<2:35:25, 14.00s/it]

{'loss': 0.3228, 'grad_norm': 18.353818893432617, 'learning_rate': 1.2800000000000001e-05, 'epoch': 2.99}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [24:32<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.23118320107460022, 'eval_runtime': 10.6026, 'eval_samples_per_second': 32.822, 'eval_steps_per_second': 2.075, 'epoch': 2.99}




[A[A                                         
                                                  
  4%|▍         | 30/696 [25:14<2:35:25, 14.00s/it]

{'loss': 0.2828, 'grad_norm': 20.09805679321289, 'learning_rate': 1.3800000000000002e-05, 'epoch': 3.22}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [25:24<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.23601575195789337, 'eval_runtime': 10.6146, 'eval_samples_per_second': 32.785, 'eval_steps_per_second': 2.073, 'epoch': 3.22}




[A[A                                         
                                                  
  4%|▍         | 30/696 [26:22<2:35:25, 14.00s/it]

{'loss': 0.2614, 'grad_norm': 20.513288497924805, 'learning_rate': 1.48e-05, 'epoch': 3.45}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [26:32<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.4108436703681946, 'eval_runtime': 9.6809, 'eval_samples_per_second': 35.947, 'eval_steps_per_second': 2.273, 'epoch': 3.45}




[A[A                                         
                                                  
  4%|▍         | 30/696 [28:06<2:35:25, 14.00s/it]

{'loss': 0.3471, 'grad_norm': 12.250540733337402, 'learning_rate': 1.58e-05, 'epoch': 3.68}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [28:19<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.19528627395629883, 'eval_runtime': 12.7028, 'eval_samples_per_second': 27.396, 'eval_steps_per_second': 1.732, 'epoch': 3.68}




[A[A                                         
                                                  
  4%|▍         | 30/696 [29:58<2:35:25, 14.00s/it]

{'loss': 0.2681, 'grad_norm': 14.916300773620605, 'learning_rate': 1.6800000000000002e-05, 'epoch': 3.91}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A                                         
                                                  


[A[A[A                                      
  4%|▍         | 30/696 [30:10<2:35:25, 14.00s/it]

[A[A

[A[A

{'eval_loss': 0.2658576965332031, 'eval_runtime': 12.338, 'eval_samples_per_second': 28.206, 'eval_steps_per_second': 1.783, 'epoch': 3.91}




[A[A                                         
                                                  
100%|██████████| 172/172 [17:13<00:00,  6.01s/it]]


{'train_runtime': 1033.382, 'train_samples_per_second': 5.373, 'train_steps_per_second': 0.166, 'train_loss': 0.3576463450526082, 'epoch': 3.95}
Modelo salvo em ./results_model/_model
Tokenizador salvo em ./results_model/tokenizer
