In [None]:
! pip3 install textattack

In [None]:
! pip install transformers[torch]

In [None]:
import pandas as pd

import torch
from transformers import AutoTokenizer, AutoConfig, BertForSequenceClassification

import textattack
from textattack import Attack
from textattack import datasets
from textattack import Attacker, AttackArgs
from textattack.attack_recipes import PWWSRen2019
from textattack.datasets import HuggingFaceDataset
from textattack.models.wrappers import ModelWrapper
from textattack.models.wrappers import HuggingFaceModelWrapper

from textattack.constraints.pre_transformation import (
    RepeatModification,
    StopwordModification,
)
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedyWordSwapWIR
from textattack.transformations import WordSwapWordNet

pd.set_option('display.max_colwidth', None)

In [None]:
! gdown "165kzfZDsRTZAAfZKedeZiUlKzMcHNgPd"  # Arabic_stop_words.txt
! gdown "1MPHZcco5Rh8VGye91qc0bxXd0iIECD3-"  # correct_classified_test.csv
! gdown "10Umn1MBzzMOnb0l-VtcrSfMk41LhhnhF"  # arabic_offensive_lang_detection_arabert.pt (ARABert model)

In [None]:
arabic_stop_words=[]
with open ('./Arabic_stop_words.txt',encoding='utf-8') as f :
    for word in f.readlines() :
        arabic_stop_words.append(word.split("\n")[0])

In [None]:
PREFIX_LIST = [
    "ÿßŸÑ",
    "Ÿà",
    "ŸÅ",
    "ÿ®",
    "ŸÉ",
    "ŸÑ",
    "ŸÑŸÑ",
    "\u0627\u0644",
    "\u0648",
    "\u0641",
    "\u0628",
    "\u0643",
    "\u0644",
    "\u0644\u0644",
    "ÿ≥",
]
SUFFIX_LIST = [
    "Ÿá",
    "Ÿáÿß",
    "ŸÉ",
    "Ÿä",
    "ŸáŸÖÿß",
    "ŸÉŸÖÿß",
    "ŸÜÿß",
    "ŸÉŸÖ",
    "ŸáŸÖ",
    "ŸáŸÜ",
    "ŸÉŸÜ",
    "ÿß",
    "ÿßŸÜ",
    "ŸäŸÜ",
    "ŸàŸÜ",
    "Ÿàÿß",
    "ÿßÿ™",
    "ÿ™",
    "ŸÜ",
    "ÿ©",
    "\u0647",
    "\u0647\u0627",
    "\u0643",
    "\u064a",
    "\u0647\u0645\u0627",
    "\u0643\u0645\u0627",
    "\u0646\u0627",
    "\u0643\u0645",
    "\u0647\u0645",
    "\u0647\u0646",
    "\u0643\u0646",
    "\u0627",
    "\u0627\u0646",
    "\u064a\u0646",
    "\u0648\u0646",
    "\u0648\u0627",
    "\u0627\u062a",
    "\u062a",
    "\u0646",
    "\u0629",
]


# the never_split list is used with the transformers library
_PREFIX_SYMBOLS = [x + "+" for x in PREFIX_LIST]
_SUFFIX_SYMBOLS = ["+" + x for x in SUFFIX_LIST]
NEVER_SPLIT_TOKENS = list(set(_PREFIX_SYMBOLS + _SUFFIX_SYMBOLS))

In [None]:
tokenizer = AutoTokenizer.from_pretrained("aubmindlab/bert-base-arabertv02-twitter",
                                          do_lower_case=False,
                                          do_basic_tokenize=True,
                                          never_split=NEVER_SPLIT_TOKENS)

Downloading (‚Ä¶)okenizer_config.json:   0%|          | 0.00/476 [00:00<?, ?B/s]

Downloading (‚Ä¶)solve/main/vocab.txt:   0%|          | 0.00/751k [00:00<?, ?B/s]

Downloading (‚Ä¶)/main/tokenizer.json:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

Downloading (‚Ä¶)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
# loading the model "aubmindlab/bert-base-arabertv02-twitter"
model = torch.load('arabic_offensive_lang_detection_arabert.pt',map_location=torch.device('cpu') )
model.to('cpu')

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(64000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [None]:
def data_reformat(data):
  """
  used to convert the data from dataframe to the tuples (text, label)
  input: dataframe
  output: list of tuples (text, label)
  """
    # create a list of data tubles
    data_tuples = [(text, label) for text, label in zip(data["tweet_clean"], data["class"])]
    # return a dataset
    return datasets.Dataset(data_tuples)

In [None]:
"""
correct_classified_test.csv: contain the correct classified test samples from test data and
                            it is used here to generate adversarial dataset for the attack
"""
# load test data
correct_classified_test = pd.read_csv('correct_classified_test.csv')
correct_classified_test = correct_classified_test.reset_index(drop=True)

attack_dataset = data_reformat(correct_classified_test)

for example in attack_dataset:
    print("label: ", example[1], " -- text: ",example[0]['text'])

label:  1  -- text:  Ÿàÿ¥ ÿ∞ÿß ŸàÿßŸÑŸÑŸá ŸÑŸà ŸÖŸà ÿ¥ÿßŸäŸÅÿßÿ™ ÿ±ÿ¨ÿßŸÑ ÿ≠ŸÑŸàŸäŸÜ ÿ®ÿ≠Ÿäÿßÿ™ŸáŸÖ ÿßÿ≥ÿ™ÿ∫ŸÅÿ± ÿßŸÑŸÑŸá ŸÇŸÑŸá ÿ≠Ÿäÿßÿ° Ÿàÿ™ÿÆŸÑŸÅ üëéüèªüëéüèªüëéüèªüò°üò° ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß
label:  1  -- text:  ÿßÿ≠ŸÑŸä ÿ¥ÿπŸàÿ± ŸÑŸÖÿß ÿßŸàÿ≥ÿπ ÿÆÿ±ŸÇŸÉ ÿßŸÑÿ∂ŸäŸÇ ÿ®ÿ≤ÿ®Ÿä ÿßŸÑŸÉÿ®Ÿäÿ± ŸäÿßÿÆŸÜŸäÿ´ üòàüë†üë† ÿπŸÖŸá ŸÉÿ≥Ÿä ÿØŸäŸàÿ´ ÿØŸäÿßÿ´Ÿá ŸÖŸÖÿ≠ŸàŸÜŸá ŸÉŸÑÿ®Ÿá ŸÉŸÑÿ® ÿπŸÖÿ™ŸÉ ÿ≥ÿßÿØŸäŸá ÿ≥ÿßŸÑÿ®
label:  1  -- text:  ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ÿßŸäŸá ÿ®ÿπÿØŸäŸÜ ÿ™ÿπÿßŸÑŸàÿß ŸÇŸàŸÑŸàÿß ÿßŸÜÿ™ŸÖ ÿ¥ŸáŸàÿßŸÜŸäŸÜ üò°
label:  1  -- text:  ŸàÿßŸÑŸÑŸá ÿπŸäÿ® ÿßŸÑŸÑŸä ÿµÿßÿ± ÿ®ÿ∫ÿ∂ ÿßŸÑŸÜÿ∏ÿ± ÿ¨ŸÖŸäŸÑ ÿßŸà ÿ¥ŸäŸÜ ŸàŸäŸÜ ŸÉÿ±ÿßŸÖÿ™ŸÉ Ÿàÿ≠Ÿäÿßÿ°ŸÉ ŸäÿπŸÜŸä ÿ®ÿπÿ∂ ÿßŸÑÿ®ŸÜÿßÿ™ ÿßŸÑŸÑŸä ÿ≠ÿßŸàŸÑŸà ŸäÿπŸÖŸÑŸà ŸÑŸÅÿ™ ÿßŸÜÿ™ÿ®ÿßŸá Ÿàÿ¥ ÿ™ÿ™ŸàŸÇÿπŸäŸÜ ŸÖŸÜŸá ÿßÿ™ÿ¨ÿßŸáŸÉ üò∑ ÿßŸÜÿß ŸàŸÖÿßŸÑŸä ÿØÿÆŸÑ ÿ™ŸÇÿ±ŸÅÿ™ ŸÖŸÜ ÿ®ÿπÿ∂ ÿßŸÑŸáŸÖÿ¨ ÿßŸÑŸÑŸä ÿ®ÿßŸÑŸÅŸäÿØŸäŸà ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ÿßŸÑÿ¨ŸÜÿßÿØÿ±ŸäŸá
label:  1  -- text:  Ÿáÿ°ŸÑÿßÿ° ŸáŸÖ ŸÖÿßŸäÿ≥ŸÖŸä ÿπŸÑŸÖÿßÿ° ÿßŸÑŸàŸáÿßÿ®ŸäŸá ÿØÿßÿ

In [None]:
model_wrapper = HuggingFaceModelWrapper(model, tokenizer)

In [None]:
"""
preparing the constrains, goal function, and search method of the attack
all of these components are used for the attack recipe
"""

# Adding the arabic stopwords
transformation = WordSwapWordNet()
constraints = [RepeatModification(), StopwordModification(stopwords = arabic_stop_words)]
goal_function = UntargetedClassification(model_wrapper)
# search over words based on a combination of their saliency score, and how efficient the WordSwap transform is
search_method = GreedyWordSwapWIR("weighted-saliency")
recipe = Attack(goal_function, constraints, transformation, search_method)
# setting the language to arabic
recipe.transformation.language = 'arb'

In [None]:
len(attack_dataset)

710

In [None]:
attack_args = textattack.AttackArgs(
    num_examples=len(attack_dataset) ,
    log_to_csv="log.csv",
    # checkpoint_interval=5,
    # checkpoint_dir="checkpoints",
    disable_stdout=True
)

attacker = Attacker(recipe, attack_dataset, attack_args)

# start the attack
attacker.attack_dataset()

textattack: Logging to CSV at path /content/drive/MyDrive/Colab Notebooks/IRI/log.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  weighted-saliency
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapWordNet
  (constraints): 
    (0): RepeatModification
    (1): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 30 / 671 / 9 / 710: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 710/710 [22:37<00:00,  1.91s/it]



+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 30     |
| Number of failed attacks:     | 671    |
| Number of skipped attacks:    | 9      |
| Original accuracy:            | 98.73% |
| Accuracy under attack:        | 94.51% |
| Attack success rate:          | 4.28%  |
| Average perturbed word %:     | 9.49%  |
| Average num. words per input: | 14.95  |
| Avg num queries:              | 78.12  |
+-------------------------------+--------+


[<textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5ac7bed2a0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5ac7bede10>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5abf31d270>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5ac2f1e680>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5ac2f1f4f0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5ac2f1f640>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5ac2f1ed70>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5ac2f1e320>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5abf377bb0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5abf377c40>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7a5abf377250>,
 <textattack.attack_results.fail

### Attack results:

In [None]:
results = pd.read_csv("log.csv",)

In [None]:
results["result_type"].value_counts()

Failed        671
Successful     30
Skipped         9
Name: result_type, dtype: int64

In [None]:
results[results["result_type"] == "Successful"].tail()

Unnamed: 0,original_text,perturbed_text,original_score,perturbed_score,original_output,perturbed_output,ground_truth_output,num_queries,result_type
569,ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ÿßŸÜÿß ŸÖÿßÿπŸÑŸä ŸÖŸÜ ÿßŸÑŸÑŸä ŸÖÿ≥ŸàŸä ÿßŸÑŸáÿßÿ¥ÿ™ÿßŸÇ ÿßŸà ŸÖŸÜ ÿßŸÑŸÑŸä Ÿäÿ±ÿ™Ÿàÿ™ Ÿà Ÿà Ÿà ŸÖŸàÿ® ÿ¨ÿßŸÑÿ∑ŸÜŸä ÿßŸÑÿß ŸÇŸÜÿßŸá ÿßŸÑÿπÿ±ÿ®ŸäŸá ŸÖŸÜÿ≤ŸÑŸá ŸáÿßŸÑŸÖŸàÿ∂Ÿàÿπ ŸÅŸä ÿßÿ≠ÿØ ÿ®ÿ±ÿßŸÖÿ¨Ÿá ŸàŸäŸÜ Ÿäÿ®ŸàŸÜ ŸäŸàÿµŸÑŸàŸÜ ÿßŸà Ÿàÿ¥ Ÿäÿ®ŸàŸÜ ŸäŸàÿµŸÑŸàŸÜ ŸÑŸÑŸÜÿßÿ≥ [[ŸÇÿ∑ÿπ]] ÿßŸÑŸÑŸá Ÿàÿ¨ŸäŸáŸÉŸÖ üëäüèª,ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ÿßŸÜÿß ŸÖÿßÿπŸÑŸä ŸÖŸÜ ÿßŸÑŸÑŸä ŸÖÿ≥ŸàŸä ÿßŸÑŸáÿßÿ¥ÿ™ÿßŸÇ ÿßŸà ŸÖŸÜ ÿßŸÑŸÑŸä Ÿäÿ±ÿ™Ÿàÿ™ Ÿà Ÿà Ÿà ŸÖŸàÿ® ÿ¨ÿßŸÑÿ∑ŸÜŸä ÿßŸÑÿß ŸÇŸÜÿßŸá ÿßŸÑÿπÿ±ÿ®ŸäŸá ŸÖŸÜÿ≤ŸÑŸá ŸáÿßŸÑŸÖŸàÿ∂Ÿàÿπ ŸÅŸä ÿßÿ≠ÿØ ÿ®ÿ±ÿßŸÖÿ¨Ÿá ŸàŸäŸÜ Ÿäÿ®ŸàŸÜ ŸäŸàÿµŸÑŸàŸÜ ÿßŸà Ÿàÿ¥ Ÿäÿ®ŸàŸÜ ŸäŸàÿµŸÑŸàŸÜ ŸÑŸÑŸÜÿßÿ≥ [[ÿµÿßŸÖÿØ]] ÿßŸÑŸÑŸá Ÿàÿ¨ŸäŸáŸÉŸÖ üëäüèª,0.001482,0.975317,1,0,1,445,Successful
584,ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ŸÑŸäÿ™ŸáŸÖ ÿ≠ÿßÿ∑ŸäŸÜ [[ÿ≥ŸÖ]] Ÿàÿ≥ÿ∑ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ÿπÿ¥ÿßŸÜ ŸÜŸÅÿ™ŸÉ ŸÖŸÜŸáŸÜ üëäüèª,ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ŸÑŸäÿ™ŸáŸÖ ÿ≠ÿßÿ∑ŸäŸÜ [[ŸÅÿ≥ÿØ]] Ÿàÿ≥ÿ∑ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ÿπÿ¥ÿßŸÜ ŸÜŸÅÿ™ŸÉ ŸÖŸÜŸáŸÜ üëäüèª,0.088619,0.532302,1,0,1,38,Successful
594,ŸäÿßŸÑÿ±ÿ®ÿπ Ÿáÿ∞ÿß [[ŸÜŸÇÿµ]] ŸÅŸä ÿπŸÇŸÑŸá ÿ®ŸÑÿß ŸÑÿ≠ÿØ ŸäÿπŸÑŸÇ ŸàŸäÿµŸÅŸÇŸÑŸá üò°üëäüèª,ŸäÿßŸÑÿ±ÿ®ÿπ Ÿáÿ∞ÿß [[ÿµÿßŸÖÿØ]] ŸÅŸä ÿπŸÇŸÑŸá ÿ®ŸÑÿß ŸÑÿ≠ÿØ ŸäÿπŸÑŸÇ ŸàŸäÿµŸÅŸÇŸÑŸá üò°üëäüèª,0.003387,0.958332,1,0,1,270,Successful
680,ŸàÿßŸÑŸÑŸá [[ÿ¨ÿßÿ®]] ÿ±ÿßÿ≥ ÿßŸÑÿÆÿ±ŸàŸÅÿßÿ™ üëéüèª,ŸàÿßŸÑŸÑŸá [[ÿ∫ÿ®ÿßÿ±]] ÿ±ÿßÿ≥ ÿßŸÑÿÆÿ±ŸàŸÅÿßÿ™ üëéüèª,0.078227,0.897382,1,0,1,127,Successful
698,ÿßŸÑÿßŸÜÿ≥ÿßŸÜŸäŸá ÿ™ŸÜÿ™ŸáŸÉ ŸÅŸä ÿßŸÑÿ∫Ÿàÿ∑Ÿá ŸàŸäÿ∞ÿ®ÿ≠ ŸàŸäŸÇÿ™ŸÑ ÿßŸÑÿßÿ∑ÿ∫ÿßŸÑ ŸàÿßŸÑÿ¥ŸäŸàÿÆ ŸàÿßŸÑŸÜÿ≥ÿßÿ° ÿπÿ±ÿ∂ ÿßŸÑÿπÿ±ÿ® ŸÖÿ≥ÿ™ÿ®ÿßÿ≠ ŸÅŸä ÿ≥Ÿàÿ±Ÿäÿß ŸÑŸäÿßÿ™Ÿä ŸÖÿ≠ŸÑŸÑ ÿ≥Ÿäÿßÿ≥Ÿä ŸÅŸäŸÇŸàŸÑ ÿßŸÑŸÖÿØŸÜŸäŸàŸÜ ŸÅŸä ÿßŸÑÿ∫Ÿàÿ∑Ÿá ŸäŸÖÿ´ŸÑŸàŸÜ ÿ∞ÿ±ÿπ ÿ®ÿ¥ÿ±Ÿä ŸÑŸÑÿ¨ŸÖÿßÿπÿßÿ™ ÿßŸÑÿßÿ±Ÿáÿßÿ®ŸäŸá ŸàŸÖÿ´ŸÑ Ÿáÿ∞Ÿá ŸÑÿßŸÖŸàÿ± ÿ™ÿ≠ÿØÿ´ ŸÅŸä ÿßŸÑÿ≠ÿ±ÿ® üò† [[ÿ¥Ÿäÿ°]] ŸäŸÅŸàÿ± ÿßŸÑÿØŸÖ ŸàÿßŸÑŸÑŸá ŸÇÿ±Ÿàÿ® ÿ¨ÿ≤ÿßÿ°ÿ≥ÿ∑ŸäŸÜ ŸÇÿ±Ÿàÿ® ŸÅŸÑÿ≥ÿ∑ŸäŸÜŸä ÿßŸÜŸÇÿ∞Ÿàÿß ÿßŸÑÿ∫Ÿàÿ∑Ÿá,ÿßŸÑÿßŸÜÿ≥ÿßŸÜŸäŸá ÿ™ŸÜÿ™ŸáŸÉ ŸÅŸä ÿßŸÑÿ∫Ÿàÿ∑Ÿá ŸàŸäÿ∞ÿ®ÿ≠ ŸàŸäŸÇÿ™ŸÑ ÿßŸÑÿßÿ∑ÿ∫ÿßŸÑ ŸàÿßŸÑÿ¥ŸäŸàÿÆ ŸàÿßŸÑŸÜÿ≥ÿßÿ° ÿπÿ±ÿ∂ ÿßŸÑÿπÿ±ÿ® ŸÖÿ≥ÿ™ÿ®ÿßÿ≠ ŸÅŸä ÿ≥Ÿàÿ±Ÿäÿß ŸÑŸäÿßÿ™Ÿä ŸÖÿ≠ŸÑŸÑ ÿ≥Ÿäÿßÿ≥Ÿä ŸÅŸäŸÇŸàŸÑ ÿßŸÑŸÖÿØŸÜŸäŸàŸÜ ŸÅŸä ÿßŸÑÿ∫Ÿàÿ∑Ÿá ŸäŸÖÿ´ŸÑŸàŸÜ ÿ∞ÿ±ÿπ ÿ®ÿ¥ÿ±Ÿä ŸÑŸÑÿ¨ŸÖÿßÿπÿßÿ™ ÿßŸÑÿßÿ±Ÿáÿßÿ®ŸäŸá ŸàŸÖÿ´ŸÑ Ÿáÿ∞Ÿá ŸÑÿßŸÖŸàÿ± ÿ™ÿ≠ÿØÿ´ ŸÅŸä ÿßŸÑÿ≠ÿ±ÿ® üò† [[ÿ¥ÿ£ŸÜ]] ŸäŸÅŸàÿ± ÿßŸÑÿØŸÖ ŸàÿßŸÑŸÑŸá ŸÇÿ±Ÿàÿ® ÿ¨ÿ≤ÿßÿ°ÿ≥ÿ∑ŸäŸÜ ŸÇÿ±Ÿàÿ® ŸÅŸÑÿ≥ÿ∑ŸäŸÜŸä ÿßŸÜŸÇÿ∞Ÿàÿß ÿßŸÑÿ∫Ÿàÿ∑Ÿá,0.459109,0.600656,1,0,1,300,Successful


In [None]:
results[results["result_type"] == "Failed"].tail()

Unnamed: 0,original_text,perturbed_text,original_score,perturbed_score,original_output,perturbed_output,ground_truth_output,num_queries,result_type
705,ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ÿßÿ¥ŸÉÿßŸÑ ÿßŸÑÿ®ŸÜÿßÿ™ ÿßŸÑÿÆÿßŸÇŸäŸÜ ÿπŸÑŸäŸá ü§ÆüòÇ,ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß ÿßÿ¥ŸÉÿßŸÑ ÿßŸÑÿ®ŸÜÿßÿ™ ÿßŸÑÿÆÿßŸÇŸäŸÜ ÿπŸÑŸäŸá ü§ÆüòÇ,0.017117,0.017117,1,1,1,6,Failed
706,ŸÜÿØÿßÿ° ŸÑ ÿßŸáŸÑŸÜÿß ŸÅŸä ÿßŸÑŸäŸÖŸÜ ÿØŸàÿ≥Ÿàÿß ÿπŸÑŸä ÿ±ÿ°Ÿàÿ≥ ÿßŸÑŸÖÿ±ÿ™ÿ≤ŸÇŸá ÿßŸÑÿßŸÖÿßÿ±ÿßÿ™ŸäŸÜ ÿßŸÑŸÖÿ≠ÿ™ŸÑŸäŸÜ ÿ® üëûüëûüë¢,ŸÜÿØÿßÿ° ŸÑ ÿßŸáŸÑŸÜÿß ŸÅŸä ÿßŸÑŸäŸÖŸÜ ÿØŸàÿ≥Ÿàÿß ÿπŸÑŸä ÿ±ÿ°Ÿàÿ≥ ÿßŸÑŸÖÿ±ÿ™ÿ≤ŸÇŸá ÿßŸÑÿßŸÖÿßÿ±ÿßÿ™ŸäŸÜ ÿßŸÑŸÖÿ≠ÿ™ŸÑŸäŸÜ ÿ® üëûüëûüë¢,0.006017,0.006017,1,1,1,9,Failed
707,ŸÇŸÑŸá ÿ≠Ÿäÿß ŸàŸäŸÜ ÿßŸáŸÑŸáŸÖ ÿ∞ŸàŸÑŸä ÿßŸÑŸÑŸá ŸÑÿßŸäÿ®ŸÑÿßŸÜÿß ŸÑŸáÿßŸÑÿØÿ±ÿ¨Ÿá ÿ™ÿ®ÿπŸäŸÜ ŸÜŸÅÿ≥ŸÉ ÿπÿ¥ÿßŸÜ Ÿàÿßÿ≠ÿØ ŸÖÿßÿØÿ±Ÿä ÿπŸÜŸÉ üëû ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß,ŸÇŸÑŸá ÿ≠Ÿäÿß ŸàŸäŸÜ ÿßŸáŸÑŸáŸÖ ÿ∞ŸàŸÑŸä ÿßŸÑŸÑŸá ŸÑÿßŸäÿ®ŸÑÿßŸÜÿß ŸÑŸáÿßŸÑÿØÿ±ÿ¨Ÿá ÿ™ÿ®ÿπŸäŸÜ ŸÜŸÅÿ≥ŸÉ ÿπÿ¥ÿßŸÜ Ÿàÿßÿ≠ÿØ ŸÖÿßÿØÿ±Ÿä ÿπŸÜŸÉ üëû ÿ®ÿßŸäÿπ ÿßŸÑŸÉŸÑŸäÿ¨ÿß,0.001305,0.001305,1,1,1,16,Failed
708,ÿ´ŸÖ ÿßŸÑÿ∑ÿ≠ŸÑÿ®Ÿá üê∏üê∏,ÿ´ŸÖ ÿßŸÑÿ∑ÿ≠ŸÑÿ®Ÿá üê∏üê∏,0.083075,0.083075,1,1,1,2,Failed
709,ŸÖÿ™ÿÆŸÑŸÅ ÿ≠ÿ™Ÿä ÿßŸÑÿ≠ŸÑÿßŸÑ ÿ≠ÿ±ŸÖÿ™ŸàŸá üò∑,ŸÖÿ™ÿÆŸÑŸÅ ÿ≠ÿ™Ÿä ÿßŸÑÿ≠ŸÑÿßŸÑ ÿ≠ÿ±ŸÖÿ™ŸàŸá üò∑,0.006062,0.006062,1,1,1,5,Failed


In [None]:
results[results["result_type"] == "Skipped"].tail()

Unnamed: 0,original_text,perturbed_text,original_score,perturbed_score,original_output,perturbed_output,ground_truth_output,num_queries,result_type
566,ÿ®ŸÑŸàŸÉ Ÿäÿßÿ¨ÿ≤ŸÖŸá üëû,ÿ®ŸÑŸàŸÉ Ÿäÿßÿ¨ÿ≤ŸÖŸá üëû,0.593629,0.593629,0,0,1,1,Skipped
580,ÿßŸÑÿ∫ÿ®ÿßÿ± ÿ¨ÿßÿ° ŸÑŸÜÿß ŸÖŸÜ ÿßŸÑÿ≥ÿπŸàÿØŸäŸá üò∑ ŸÑÿß ÿØÿ®ÿ±ŸáŸÖ,ÿßŸÑÿ∫ÿ®ÿßÿ± ÿ¨ÿßÿ° ŸÑŸÜÿß ŸÖŸÜ ÿßŸÑÿ≥ÿπŸàÿØŸäŸá üò∑ ŸÑÿß ÿØÿ®ÿ±ŸáŸÖ,0.515921,0.515921,0,0,1,1,Skipped
606,ÿßŸÖÿ≥ ÿ¥ÿßÿ±Ÿä ÿ≠ÿ≥ÿßÿ® ŸÜÿ™ŸÅŸÑŸäŸÉÿ≥ ŸàŸÇÿ®ŸÑ ÿ¥ŸàŸä ÿßŸÑŸÜÿ™ ÿßŸÜÿ™Ÿáÿß ÿ≠ÿ∏Ÿä üí©,ÿßŸÖÿ≥ ÿ¥ÿßÿ±Ÿä ÿ≠ÿ≥ÿßÿ® ŸÜÿ™ŸÅŸÑŸäŸÉÿ≥ ŸàŸÇÿ®ŸÑ ÿ¥ŸàŸä ÿßŸÑŸÜÿ™ ÿßŸÜÿ™Ÿáÿß ÿ≠ÿ∏Ÿä üí©,0.55571,0.55571,0,0,1,1,Skipped
612,ÿ®ÿ≥ ÿ±ÿÆÿßŸÖŸá Ÿäÿß ÿ±ÿÆÿßŸÖŸá üòÇüòÇüî™üî™,ÿ®ÿ≥ ÿ±ÿÆÿßŸÖŸá Ÿäÿß ÿ±ÿÆÿßŸÖŸá üòÇüòÇüî™üî™,0.505606,0.505606,0,0,1,1,Skipped
631,‚Ä¢ ÿßŸÑÿπÿßŸÖŸÑ ŸÇÿßÿπÿØ Ÿäÿ´ÿ®ÿ™ ŸÑŸÑŸÖÿπÿ≤ÿ® ÿßŸÜ ÿßŸÑÿÆÿ±ŸàŸÅ üêë ‚Ä¢ ÿµÿßÿ± ÿµÿØŸäŸÇŸá ÿ≠ÿ™Ÿä ŸÖÿß Ÿäÿ∞ÿ®ÿ≠Ÿá üòÇüòÇ,‚Ä¢ ÿßŸÑÿπÿßŸÖŸÑ ŸÇÿßÿπÿØ Ÿäÿ´ÿ®ÿ™ ŸÑŸÑŸÖÿπÿ≤ÿ® ÿßŸÜ ÿßŸÑÿÆÿ±ŸàŸÅ üêë ‚Ä¢ ÿµÿßÿ± ÿµÿØŸäŸÇŸá ÿ≠ÿ™Ÿä ŸÖÿß Ÿäÿ∞ÿ®ÿ≠Ÿá üòÇüòÇ,0.522233,0.522233,0,0,1,1,Skipped
