In [1]:
import torch

from typing import List, Union, Tuple
from transformers import AutoTokenizer, AutoModelForSequenceClassification

from nlptest.behavior import SequenceClassificationBehavior
from nlptest.types import BehaviorType, TaskType

  from .autonotebook import tqdm as notebook_tqdm


A `Behavior` aims to test a specific capability of a model by checking the prediction of a given input. We present here how to use a behavior for the specific use case of a sentiment classification problem.

In [2]:
# We first load our model
tokenizer = AutoTokenizer.from_pretrained("avichr/heBERT_sentiment_analysis")
model = AutoModelForSequenceClassification.from_pretrained("avichr/heBERT_sentiment_analysis")

In [3]:
# We then generate samples we want to test a specific behavior on
samples = [
    "This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen!",
    "I thought this book was brilliant, but yet realistic. It showed me that to error is human. I loved the fact that this writer showed the loving side of God and not the revengeful side of him. I loved how it twisted and turned and I could not put it down. I also loved The glass castle.",
    "I read the reviews,made my purchase and was very disappointed. The charger is convenient by charging all four batteries at once but the charge only lasts a very short time. I now have to go and find batteries that will give me longer life than the kodak NiMH AA batteries."
]

In [4]:
# We then define our prediction function
def predict(texts: Union[List[str], str]) -> Union[List[int], List[Tuple[int, float]]]:
    """"""
    inputs = tokenizer(texts, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    max_probs, preds = torch.max(probs, dim=-1)
    return [(label, prob) for label, prob in zip(preds.tolist(), max_probs.tolist())]

In [5]:
# We can now create our behavioral test
behavior = SequenceClassificationBehavior(
    name="Amazon polarity behavior",
    test_type=BehaviorType.minimum_functionality,
    task_type=TaskType.sequence_classification,
    samples=samples,
    predict_fn=predict,
    labels=[2,2,0],
    description="Checking model behavior on Amazon review"
)

In [6]:
# Once created we can now run the test and check the output
behavior.run()

for i, output in enumerate(behavior.outputs):
    print(f"Output {i}: {output}\n")

Output 0: text='This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen!' y_pred='2' y_pred_prob=0.9918654561042786 y='2'

Output 1: text='I thought this book was brilliant, but yet realistic. It showed me that to error is human. I loved the fact that this writer showed the loving side of God and not the revengeful side of him. I loved how it twisted and turned and I could not put it down. I also loved The glass castle.' y_pred='2' y_pred_prob=0.9981094598770142 y='2'

Output 2: text='I read the reviews,made my purchase and was very disappointed. The charger is convenient by charging all four batteries at once but the charge only lasts a very short time. I n

We can also save this behavior and test it against a new model

In [7]:
# saving the behavior
behavior.to_file("saved_behaviors/")

In [8]:
# loading a new model
new_tokenizer = AutoTokenizer.from_pretrained("sbcBI/sentiment_analysis")
new_model = AutoModelForSequenceClassification.from_pretrained("sbcBI/sentiment_analysis")

In [9]:
# We then define our new prediction function
def new_predict(texts: Union[List[str], str]) -> Union[List[int], List[Tuple[int, float]]]:
    """"""
    inputs = new_tokenizer(texts, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
    with torch.no_grad():
        outputs = new_model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    max_probs, preds = torch.max(probs, dim=-1)
    return [(label, prob) for label, prob in zip(preds.tolist(), max_probs.tolist())]

In [10]:
# loading the behavior
new_behavior = SequenceClassificationBehavior.from_file(
    path_to_file="saved_behaviors/amazon_polarity_behavior.pkl",
    predict_fn=new_predict
)

In [11]:
# We can now run the test and check the output of the new model
new_behavior.run()

for i, output in enumerate(new_behavior.outputs):
    print(f"Output {i}: {output}\n")

Output 0: text='This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen!' y_pred='2' y_pred_prob=0.9460005760192871 y='2'

Output 1: text='I thought this book was brilliant, but yet realistic. It showed me that to error is human. I loved the fact that this writer showed the loving side of God and not the revengeful side of him. I loved how it twisted and turned and I could not put it down. I also loved The glass castle.' y_pred='2' y_pred_prob=0.9702939987182617 y='2'

Output 2: text='I read the reviews,made my purchase and was very disappointed. The charger is convenient by charging all four batteries at once but the charge only lasts a very short time. I n