## Evaluate notebook

In [11]:
import tensorflow as tf
import pandas as pd
import evaluate
import os
dir_root = os.getcwd()

In [3]:
accuracy_ev = evaluate.load("accuracy")
print(accuracy_ev.description)


Accuracy is the proportion of correct predictions among the total number of cases processed. It can be computed with:
Accuracy = (TP + TN) / (TP + TN + FP + FN)
 Where:
TP: True positive
TN: True negative
FP: False positive
FN: False negative



In [33]:
# model_tf = tf.saved_model.load(os.path.join(os.getcwd(), 'models/tfsample/'))
# model_tf = tf.keras.models.load_model(os.path.join(os.getcwd(), 'models/modeltf/'))
from transformers import AutoModelForSequenceClassification
model_tf = AutoModelForSequenceClassification.from_pretrained('models/huggingfacemodel/', from_tf=True)


All TF 2.0 model weights were used when initializing DistilBertForSequenceClassification.

All the weights of DistilBertForSequenceClassification were initialized from the TF 2.0 model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use DistilBertForSequenceClassification for predictions without further training.


In [34]:
model_tf

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
      

In [12]:
dataset_df = pd.read_csv(os.path.join(dir_root, 'data/interim/trainset.csv'), converters={'NDD':str})
labels_set = set(dataset_df.labels.to_list())

In [14]:
from datasets import load_dataset, Features, ClassLabel, Value

In [18]:
traincsv = os.path.join(dir_root, 'data/interim/trainsethugf.csv')
testcsv = os.path.join(dir_root, 'data/interim/testsethugf.csv')
validcsv = os.path.join(dir_root, 'data/interim/validsethugf.csv')
# class_names = ["RoboADomicilio", "RoboAPersonas", "RoboAUnidadesEconomicas", "RoboDeBienesAccesoriosYAutoPartes", "RoboDeCarros", "RoboDeMotos"]
class_names = list(labels_set)
robo_features = Features({'relato': Value('string'), 'labels': ClassLabel(names=class_names)})
dataset = load_dataset("csv", data_files={'train': traincsv, 'test': testcsv, 'validation':validcsv}, features=robo_features)

Using custom data configuration default-92b60acd05f883de


Downloading and preparing dataset csv/default to C:\Users\entea\.cache\huggingface\datasets\csv\default-92b60acd05f883de\0.0.0\51cce309a08df9c4d82ffd9363bbe090bf173197fc01a71b034e8594995a1a58...


Downloading data files: 100%|██████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 2998.79it/s]
Extracting data files: 100%|████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 499.90it/s]
                                 

Dataset csv downloaded and prepared to C:\Users\entea\.cache\huggingface\datasets\csv\default-92b60acd05f883de\0.0.0\51cce309a08df9c4d82ffd9363bbe090bf173197fc01a71b034e8594995a1a58. Subsequent calls will reuse this data.


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 49.97it/s]


In [19]:
seqlen = dataset_df.relato.apply(lambda x: len(x.split()))

In [20]:
from transformers import DistilBertTokenizer, AutoTokenizer, DistilBertTokenizerFast
# model_name = 'xlm-roberta-large'
# model_name = 'bert-base-cased'
# model_name = 'bert-base-multilingual-uncased-sentiment'
model_name = 'distilbert-base-multilingual-cased'
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)

In [21]:
def tokenizer_func(examples):
  return tokenizer(examples["relato"],
                   max_length=seqlen.max(),
                   padding = "max_length",
                   truncation=True)

tokenized_dataset = dataset.map(tokenizer_func, batched=True)

100%|████████████████████████████████████████████████████████████████████████████████| 274/274 [01:04<00:00,  4.22ba/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 90/90 [00:22<00:00,  4.09ba/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 69/69 [00:16<00:00,  4.08ba/s]


In [22]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

In [23]:
tf_test_set = tokenized_dataset["test"].shuffle(seed=42).select(range(4000)).to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels"],
    shuffle=False,
    batch_size=16,
    collate_fn=data_collator,
)

In [26]:
# from transformers import create_optimizer

# batch_size = 16
# num_epochs = 50
# batches_per_epoch = len(tokenized_dataset["train"]) // batch_size
# total_train_steps = int(batches_per_epoch * num_epochs)
# optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)

In [36]:
# model_tf.compile(optimizer=optimizer, metrics = ['accuracy'])

In [38]:
# model_tf.evaluate(tf_test_set)

In [45]:
from evaluate import evaluator
metric = evaluate.load("accuracy")
eval = evaluator("text-classification")

results = eval.compute(model_tf, data=dataset["test"].shuffle(seed=42).select(range(4000)), metric=metric, input_column="relato", label_column="labels", tokenizer = tokenizer)

print(results)

Loading cached shuffled indices for dataset at C:\Users\entea\.cache\huggingface\datasets\csv\default-92b60acd05f883de\0.0.0\51cce309a08df9c4d82ffd9363bbe090bf173197fc01a71b034e8594995a1a58\cache-1dfcb5a6ce1efb95.arrow


ValueError: invalid literal for int() with base 10: 'LABEL_2'