In [1]:
import pandas as pd
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
import numpy as np
import evaluate
import datasets as ds

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train = pd.read_csv('../data/headlines_train.csv')#.sample(500, random_state=42)
test = pd.read_csv('../data/headlines_test.csv')#.sample(50, random_state=42)
eval = pd.read_csv('../data/headlines_val.csv')#.sample(50, random_state=42)

dataset = ds.DatasetDict({
    "train": ds.Dataset.from_pandas(train),
    "test": ds.Dataset.from_pandas(test),
    "eval": ds.Dataset.from_pandas(eval)
})


In [3]:
#modelname = "EMBEDDIA/crosloengual-bert"
modelname = "EMBEDDIA/sloberta"
tokenizer = AutoTokenizer.from_pretrained(modelname)
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", max_length=160, truncation=True, return_tensors="pt")

tokenized_datasets = dataset.map(tokenize_function, batched=True)

Map: 100%|██████████| 500/500 [00:00<00:00, 5554.27 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 4163.25 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 3844.81 examples/s]


In [4]:
model = AutoModelForSequenceClassification.from_pretrained(modelname, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", num_train_epochs=5)

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at EMBEDDIA/sloberta and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["eval"],
    compute_metrics=compute_metrics
)

trainer.train()
predictions = trainer.predict(test_dataset=tokenized_datasets["test"])

                                                
 20%|██        | 63/315 [05:09<22:31,  5.36s/it]

{'eval_loss': 0.6880384683609009, 'eval_accuracy': 0.54, 'eval_runtime': 1.8255, 'eval_samples_per_second': 27.39, 'eval_steps_per_second': 3.835, 'epoch': 1.0}


                                                  
 40%|████      | 126/315 [13:57<18:16,  5.80s/it]

{'eval_loss': 0.6863951086997986, 'eval_accuracy': 0.54, 'eval_runtime': 1.8542, 'eval_samples_per_second': 26.966, 'eval_steps_per_second': 3.775, 'epoch': 2.0}


                                                 
 60%|██████    | 189/315 [20:55<10:18,  4.91s/it]

{'eval_loss': 0.6865469217300415, 'eval_accuracy': 0.54, 'eval_runtime': 2.4618, 'eval_samples_per_second': 20.311, 'eval_steps_per_second': 2.843, 'epoch': 3.0}


                                                 
 80%|████████  | 252/315 [28:31<05:42,  5.43s/it]

{'eval_loss': 0.6850305199623108, 'eval_accuracy': 0.54, 'eval_runtime': 2.2368, 'eval_samples_per_second': 22.353, 'eval_steps_per_second': 3.129, 'epoch': 4.0}


                                                 
100%|██████████| 315/315 [38:00<00:00,  7.24s/it]


{'eval_loss': 0.6852250099182129, 'eval_accuracy': 0.54, 'eval_runtime': 2.5456, 'eval_samples_per_second': 19.642, 'eval_steps_per_second': 2.75, 'epoch': 5.0}
{'train_runtime': 2280.1774, 'train_samples_per_second': 1.096, 'train_steps_per_second': 0.138, 'train_loss': 0.6917421371217758, 'epoch': 5.0}


100%|██████████| 7/7 [00:06<00:00,  1.09it/s]


In [6]:
print(predictions)

PredictionOutput(predictions=array([[ 0.04393283, -0.10118605],
       [ 0.03999142, -0.14097118],
       [ 0.06262582, -0.11103121],
       [ 0.03650834, -0.08542555],
       [ 0.0726263 , -0.06758032],
       [ 0.06141059, -0.07715786],
       [ 0.04434155, -0.07388615],
       [ 0.02087264, -0.11643792],
       [ 0.06041903, -0.10991348],
       [ 0.05014312, -0.09180582],
       [ 0.0156146 , -0.08504643],
       [ 0.04627168, -0.10775284],
       [ 0.04192035, -0.07549126],
       [ 0.0526369 , -0.06320458],
       [ 0.05859836, -0.09541917],
       [ 0.06622987, -0.08778339],
       [ 0.03947572, -0.11378293],
       [ 0.0795209 , -0.09122261],
       [ 0.04372286, -0.08167616],
       [ 0.04898355, -0.10725186],
       [ 0.04676703, -0.09048095],
       [ 0.07019776, -0.08320591],
       [ 0.05150445, -0.07409073],
       [ 0.03824732, -0.09872345],
       [ 0.04652142, -0.08144905],
       [ 0.06000957, -0.09035443],
       [ 0.07522237, -0.11295918],
       [ 0.02467533, -0.09

In [13]:
from sklearn import metrics

prediction = np.argmax(predictions.predictions, axis=-1)
prediction = prediction.tolist()
print(prediction)
recall = metrics.recall_score(dataset["test"]["label"],prediction)
precision = metrics.precision_score(dataset["test"]["label"],prediction)
f1_score = metrics.f1_score(dataset["test"]["label"],prediction)
accuracy = metrics.accuracy_score(dataset["test"]["label"],prediction)
loss = metrics.log_loss(dataset["test"]["label"],prediction)

print('Loss:',loss)
print('Accuracy:',accuracy)
print('Precision:',precision)
print('Recall:',recall)
print('f1 score:',f1_score)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Loss: 16.580080558993888
Accuracy: 0.54
Precision: 0.0
Recall: 0.0
f1 score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
