In [1]:
from datasets import load_dataset
from evaluate import evaluator, combine
from transformers import pipeline
from transformers.utils import logging
logging.set_verbosity_error() 

import torch

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sentences = load_dataset("rotten_tomatoes", split="test").select(range(5))

In [3]:
[print(sentence) for sentence in sentences['text']]

lovingly photographed in the manner of a golden book sprung to life , stuart little 2 manages sweetness largely without stickiness .
consistently clever and suspenseful .
it's like a " big chill " reunion of the baader-meinhof gang , only these guys are more harmless pranksters than political activists .
the story gives ample opportunity for large-scale action and suspense , which director shekhar kapur supplies with tremendous skill .
red dragon " never cuts corners .


[None, None, None, None, None]

In [4]:
roberta_pipe = pipeline("sentiment-analysis", model="textattack/roberta-base-rotten-tomatoes", device=device)

In [5]:
predictions = roberta_pipe(sentences['text'])

In [6]:
for idx, _sentence in enumerate(sentences['text']):
 print(f"actual:{sentences['label'][idx]} "
 f"predicted:{'1' if predictions[idx]['label']=='LABEL_1' else '0'} \t {_sentence} ")

actual:1 predicted:1 	 lovingly photographed in the manner of a golden book sprung to life , stuart little 2 manages sweetness largely without stickiness . 
actual:1 predicted:1 	 consistently clever and suspenseful . 
actual:1 predicted:0 	 it's like a " big chill " reunion of the baader-meinhof gang , only these guys are more harmless pranksters than political activists . 
actual:1 predicted:1 	 the story gives ample opportunity for large-scale action and suspense , which director shekhar kapur supplies with tremendous skill . 
actual:1 predicted:1 	 red dragon " never cuts corners . 


In [7]:
sentences = load_dataset("rotten_tomatoes", split="test")

In [8]:
task_evaluator = evaluator("sentiment-analysis")

In [9]:
eval_results = task_evaluator.compute(
   model_or_pipeline=roberta_pipe,
   data=sentences,
   metric=combine(["accuracy", "precision", "recall", "f1"]),
   label_mapping={"LABEL_0": 0, "LABEL_1": 1})

Downloading builder script:   0%|          | 0.00/7.55k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

In [10]:
print(eval_results)

{'accuracy': 0.8874296435272045, 'precision': 0.9222903885480572, 'recall': 0.8461538461538461, 'f1': 0.8825831702544031, 'total_time_in_seconds': 25.49598717000117, 'samples_per_second': 41.81050111502511, 'latency_in_seconds': 0.023917436369607106}
