## Test models

In [None]:
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import pandas as pd

# Enable GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model_name = "tupi-bert-base-portuguese-cased"
model_tokenizer_path = f"FpOliveira/{model_name}"

# Define features and target
x_name = "text"
y_name_split = "hate"
y_name_target = "hate"

# Load and split the dataset into training and validation sets (stratified)
df = pd.read_csv("https://raw.githubusercontent.com/Silly-Machine/TuPi-Portuguese-Hate-Speech-Dataset/main/data/raw/binary/tupi_binary.csv")
train_texts, val_texts, train_labels, val_labels = train_test_split(df[x_name], df[y_name_target], test_size=0.2, random_state=42, stratify=df[y_name_split])
test_dataset = pd.DataFrame({x_name: val_texts, y_name_target : val_labels})
##Note: it is necessary to use this particular form of partition to reproduce the original training and testing codes at the time of training

# Define the tokenizer (replace 'bert-base-multilingual-cased' with your desired tokenizer)
tokenizer = BertTokenizer.from_pretrained(model_tokenizer_path)

# Load the pre-trained model and move it to the device
loaded_model = BertForSequenceClassification.from_pretrained(model_tokenizer_path).to(device)
loaded_model.eval()

# Tokenize the test dataset
test_encodings = tokenizer(list(test_dataset[x_name]), truncation=True, padding=True, return_tensors='pt').to(device)
test_labels = torch.tensor(test_dataset[y_name_target].values).long()

# Create PyTorch dataset
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx]).to(device)
        return item

    def __len__(self):
        return len(self.labels)

test_dataset = CustomDataset(test_encodings, test_labels)

# Test the model on the test dataset
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
test_preds = []

with torch.no_grad():
    for batch_idx, batch in enumerate(test_loader, 1):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']

        outputs = loaded_model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)
        test_preds.extend(preds.cpu().numpy())

# Evaluate the model on the test dataset
array_test_labels = test_labels.cpu().numpy()
test_accuracy = accuracy_score(array_test_labels, test_preds)
test_precision = precision_score(array_test_labels, test_preds, average='weighted')
test_recall = recall_score(array_test_labels, test_preds, average='weighted')
test_f1 = f1_score(array_test_labels, test_preds, average='weighted')

# Create a pandas DataFrame with the results
results_df = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [test_accuracy, test_precision, test_recall, test_f1]
})
results_df.to_csv(f'{model_name}_{y_name_target}.csv', index=False)
#
print('\nTest Metrics:')
print(f'Accuracy: {test_accuracy}')
print(f'Precision: {test_precision}')
print(f'Recall: {test_recall}')
print(f'F1 Score: {test_f1}')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/210k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
  item['labels'] = torch.tensor(self.labels[idx]).to(device)
  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
  item['labels'] = torch.tensor(self.labels[idx]).to(device)



Test Metrics:
Accuracy: 0.9009617586443783
Precision: 0.8969552558495383
Recall: 0.9009617586443783
F1 Score: 0.8987399701620706


In [None]:
# Evaluate the model on the test dataset
array_test_labels = test_labels.cpu().numpy()
test_accuracy = accuracy_score(array_test_labels, test_preds)
test_precision = precision_score(array_test_labels, test_preds)
test_recall = recall_score(array_test_labels, test_preds)
test_f1 = f1_score(array_test_labels, test_preds)

# Create a pandas DataFrame with the results
results_df = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [test_accuracy, test_precision, test_recall, test_f1]
})
results_df.to_csv(f'{model_name}_{y_name_target}.csv', index=False)
#
print('\nTest Metrics:')
print(f'Accuracy: {test_accuracy}')
print(f'Precision: {test_precision}')
print(f'Recall: {test_recall}')
print(f'F1 Score: {test_f1}')


Test Metrics:
Accuracy: 0.9009617586443783
Precision: 0.5981012658227848
Recall: 0.5394862036156042
F1 Score: 0.5672836418209104
