https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb#scrollTo=34SjM0fp6kor

https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/test-specific-notebooks/Robustness_DEMO.ipynb#scrollTo=LzhAZEqQVkym

## datasets interactions

In [19]:
from repsim.nlp import get_dataset
import datasets

ds = get_dataset("sst2")
ds["train"]

Dataset({
    features: ['idx', 'sentence', 'label'],
    num_rows: 67349
})

In [49]:
ds["validation"].to_csv()

TypeError: Dataset.to_csv() missing 1 required positional argument: 'path_or_buf'

In [20]:
datasets.load_dataset("csv", data_files="sst2_augmented.csv")

Generating train split: 0 examples [00:00, ? examples/s]

  return pd.read_csv(xopen(filepath_or_buffer, "rb", download_config=download_config), **kwargs)


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 67349
    })
})

In [18]:
ds["train"][0]

{'idx': 0,
 'sentence': 'hide new secretions from the parental units ',
 'label': 0}

## augmentation

In [1]:
from langtest import Harness


In [18]:
# How many of the samples get affected by the test
# custom_proportions = {"add_typo": 0.3}

# Dataset to augment
data_cfg = {
    "data_source": "sst2",
    "feature_column": "sentence",
    "target_column": "label",
    "split": "train",
    "source": "huggingface",
}

model_cfg = {"model": "google/bert_uncased_L-2_H-128_A-2", "hub": "huggingface"}
# model_cfg = {"model": "google/bert_uncased_L-4_H-256_A-4", "hub": "huggingface"}
# model_cfg = {"model": "distilbert/distilroberta-base", "hub": "huggingface"}
# model_cfg = {"model": "google/multiberts-seed_0", "hub": "huggingface"}

harness = Harness(task="text-classification", model=model_cfg, data=data_cfg)

# Tests
prob = 1
harness.configure(
    {
        "tests": {
            "defaults": {"min_pass_rate": 0.65},
            "robustness": {
                "add_abbreviation": {"min_pass_rate": 0.95, "prob":prob},
                "add_contraction": {"min_pass_rate": 0.95, "prob":prob},
                "add_punctuation": {"min_pass_rate": 0.95, "prob":prob},
                "add_slangs": {"min_pass_rate": 0.95, "prob":prob},
                "add_speech_to_text_typo": {"min_pass_rate": 0.95, "prob":prob},
                "add_typo": {"min_pass_rate": 0.95, "prob":prob},
                "american_to_british": {"min_pass_rate": 0.95, "prob":prob},
                "dyslexia_word_swap": {"min_pass_rate": 0.95, "prob":prob},
                "number_to_word": {"min_pass_rate": 0.95, "prob":prob},
                "strip_all_punctuation": {"min_pass_rate": 0.95, "prob":prob},
                "titlecase": {"min_pass_rate": 0.95, "prob":prob},
                "lowercase": {"min_pass_rate": 0.95, "prob":prob},
                "uppercase": {"min_pass_rate": 0.95, "prob":prob},
                "multiple_perturbations": {
                    "min_pass_rate": 0.60,
                    "prob": prob,
                    "perturbations1": [
                        "add_abbreviation",
                        "add_contraction",
                        "add_punctuation",
                        "add_slangs",
                        "add_speech_to_text_typo",
                        "add_typo",
                        "american_to_british",
                        "dyslexia_word_swap",
                        "number_to_word",
                        "strip_all_punctuation",
                        "titlecase",
                        "lowercase",
                        "uppercase",
                    ],
                },
            },
        }
    }
)

Test Configuration : 
 {
 "tests": {
  "defaults": {
   "min_pass_rate": 1.0
  },
  "robustness": {
   "add_typo": {
    "min_pass_rate": 0.7
   },
   "american_to_british": {
    "min_pass_rate": 0.7
   }
  },
  "accuracy": {
   "min_micro_f1_score": {
    "min_score": 0.7
   }
  },
  "bias": {
   "replace_to_female_pronouns": {
    "min_pass_rate": 0.7
   },
   "replace_to_low_income_country": {
    "min_pass_rate": 0.7
   }
  },
  "fairness": {
   "min_gender_f1_score": {
    "min_score": 0.6
   }
  },
  "representation": {
   "min_label_representation_count": {
    "min_count": 50
   }
  }
 }
}


{'tests': {'defaults': {'min_pass_rate': 0.65},
  'robustness': {'add_abbreviation': {'min_pass_rate': 0.95, 'prob': 1},
   'add_contraction': {'min_pass_rate': 0.95, 'prob': 1},
   'add_punctuation': {'min_pass_rate': 0.95, 'prob': 1},
   'add_slangs': {'min_pass_rate': 0.95, 'prob': 1},
   'add_speech_to_text_typo': {'min_pass_rate': 0.95, 'prob': 1},
   'add_typo': {'min_pass_rate': 0.95, 'prob': 1},
   'american_to_british': {'min_pass_rate': 0.95, 'prob': 1},
   'dyslexia_word_swap': {'min_pass_rate': 0.95, 'prob': 1},
   'number_to_word': {'min_pass_rate': 0.95, 'prob': 1},
   'strip_all_punctuation': {'min_pass_rate': 0.95, 'prob': 1},
   'titlecase': {'min_pass_rate': 0.95, 'prob': 1},
   'lowercase': {'min_pass_rate': 0.95, 'prob': 1},
   'uppercase': {'min_pass_rate': 0.95, 'prob': 1},
   'multiple_perturbations': {'min_pass_rate': 0.6,
    'prob': 1,
    'perturbations1': ['add_abbreviation',
     'add_contraction',
     'add_punctuation',
     'add_slangs',
     'add_speech

In [19]:
harness.generate()

Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 3802.63it/s]
[W010] - Test 'add_abbreviation': 31144 samples removed out of 67349
[W010] - Test 'add_contraction': 65198 samples removed out of 67349
[W010] - Test 'add_slangs': 41975 samples removed out of 67349
[W010] - Test 'add_speech_to_text_typo': 21588 samples removed out of 67349
[W010] - Test 'add_typo': 2775 samples removed out of 67349
[W010] - Test 'american_to_british': 64304 samples removed out of 67349
[W010] - Test 'dyslexia_word_swap': 34803 samples removed out of 67349
[W010] - Test 'number_to_word': 66343 samples removed out of 67349
[W010] - Test 'strip_all_punctuation': 32321 samples removed out of 67349
[W010] - Test 'titlecase': 7 samples removed out of 67349
[W010] - Test 'lowercase': 67349 samples removed out of 67349
[W010] - Test 'uppercase': 7 samples removed out of 67349
[W010] - Test 'add_abbreviation-add_contraction-add_punctuation-add_slangs-add_speech_to_text_typo-add_typo-american_to_british-d



In [20]:
harness.save("saved_test_configurations", include_generated_results=True)
harness.testcases()

Unnamed: 0,category,test_type,original,test_case
0,robustness,add_abbreviation,hide new secretions from the parental units,hide new secretions from da parental units
1,robustness,add_abbreviation,that loves its characters and communicates som...,that loves its characters and communicates som...
2,robustness,add_abbreviation,remains utterly satisfied to remain the same t...,remains utterly satisfied 2 remain da same thr...
3,robustness,add_abbreviation,on the worst revenge-of-the-nerds clichés the ...,on da worst revenge-of-tdanerds clichés thdail...
4,robustness,add_abbreviation,that 's far too tragic to merit such superfici...,that 's far too tragic 2 merit such superficia...
...,...,...,...,...
515065,robustness,add_abbreviation-add_contraction-add_punctuati...,a delightful comedy,A DELIGHTFUL COMSDY
515066,robustness,add_abbreviation-add_contraction-add_punctuati...,"anguish , anger and frustration",ANGUISH ANGSR AND FRUSTRATION
515067,robustness,add_abbreviation-add_contraction-add_punctuati...,"at achieving the modest , crowd-pleasing goals...",AT ACHIRVING DA MODEST CROWDPLEASING GOELZ IT ...
515068,robustness,add_abbreviation-add_contraction-add_punctuati...,a patient viewer,S PATIENT VIEWER


In [21]:
harness.run()
harness.generated_results()

Running testcases... : 100%|██████████| 515070/515070 [36:04<00:00, 238.00it/s] 


Unnamed: 0,category,test_type,original,test_case,expected_result,actual_result,pass
0,robustness,add_abbreviation,hide new secretions from the parental units,hide new secretions from da parental units,LABEL_0,LABEL_0,True
1,robustness,add_abbreviation,that loves its characters and communicates som...,that loves its characters and communicates som...,LABEL_0,LABEL_0,True
2,robustness,add_abbreviation,remains utterly satisfied to remain the same t...,remains utterly satisfied 2 remain da same thr...,LABEL_0,LABEL_0,True
3,robustness,add_abbreviation,on the worst revenge-of-the-nerds clichés the ...,on da worst revenge-of-tdanerds clichés thdail...,LABEL_0,LABEL_0,True
4,robustness,add_abbreviation,that 's far too tragic to merit such superfici...,that 's far too tragic 2 merit such superficia...,LABEL_0,LABEL_0,True
...,...,...,...,...,...,...,...
515065,robustness,add_abbreviation-add_contraction-add_punctuati...,a delightful comedy,A DELIGHTFUL COMSDY,LABEL_0,LABEL_0,True
515066,robustness,add_abbreviation-add_contraction-add_punctuati...,"anguish , anger and frustration",ANGUISH ANGSR AND FRUSTRATION,LABEL_0,LABEL_0,True
515067,robustness,add_abbreviation-add_contraction-add_punctuati...,"at achieving the modest , crowd-pleasing goals...",AT ACHIRVING DA MODEST CROWDPLEASING GOELZ IT ...,LABEL_0,LABEL_0,True
515068,robustness,add_abbreviation-add_contraction-add_punctuati...,a patient viewer,S PATIENT VIEWER,LABEL_0,LABEL_0,True


In [22]:
harness.report()

Unnamed: 0,category,test_type,fail_count,pass_count,pass_rate,minimum_pass_rate,pass
0,robustness,add_abbreviation,0,36205,100%,95%,True
1,robustness,add_contraction,0,2151,100%,95%,True
2,robustness,add_punctuation,0,67349,100%,95%,True
3,robustness,add_slangs,0,25374,100%,95%,True
4,robustness,add_speech_to_text_typo,0,45761,100%,95%,True
5,robustness,add_typo,0,64574,100%,95%,True
6,robustness,american_to_british,0,3045,100%,95%,True
7,robustness,dyslexia_word_swap,0,32546,100%,95%,True
8,robustness,number_to_word,0,1006,100%,95%,True
9,robustness,strip_all_punctuation,0,35028,100%,95%,True


In [40]:
harness.augment(
    training_data=data_cfg,
    save_data_path="sst2_augmented.csv",
    # custom_proportions=custom_proportions,
    export_mode="add",
)

[W010] - Test 'add_abbreviation': 11 samples removed out of 82

[W010] - Test 'add_punctuation': 160 samples removed out of 165

[W010] - Test 'add_slangs': 35 samples removed out of 82

[W010] - Test 'add_speech_to_text_typo': 3 samples removed out of 82

[W010] - Test 'add_typo': 4 samples removed out of 82

[W010] - Test 'american_to_british': 76 samples removed out of 82

[W010] - Test 'dyslexia_word_swap': 9 samples removed out of 82

[W010] - Test 'number_to_word': 159 samples removed out of 165

[W010] - Test 'strip_all_punctuation': 2 samples removed out of 82





## try out loading

In [50]:
new_harness = Harness.load(
    save_dir="saved_test_configurations",
    model=model_cfg,
    task="text-classification",
    load_testcases=True
)

Test Configuration : 
 {
 "tests": {
  "defaults": {
   "min_pass_rate": 0.65
  },
  "robustness": {
   "add_abbreviation": {
    "min_pass_rate": 0.95
   },
   "add_contraction": {
    "min_pass_rate": 0.95
   },
   "add_punctuation": {
    "min_pass_rate": 0.95
   },
   "add_slangs": {
    "min_pass_rate": 0.95
   },
   "add_speech_to_text_typo": {
    "min_pass_rate": 0.95
   },
   "add_typo": {
    "min_pass_rate": 0.95
   },
   "american_to_british": {
    "min_pass_rate": 0.95,
    "parameters": {
     "accent_map": {
      "accessorize": "accessorise",
      "accessorized": "accessorised",
      "accessorizes": "accessorises",
      "accessorizing": "accessorising",
      "acclimatization": "acclimatisation",
      "acclimatize": "acclimatise",
      "acclimatized": "acclimatised",
      "acclimatizes": "acclimatises",
      "acclimatizing": "acclimatising",
      "accouterments": "accoutrements",
      "aerogram": "aerogramme",
      "aerograms": "aerogrammes",
      "aggrandiz

In [52]:
new_harness.run()

Running testcases... :   0%|          | 0/15114 [00:00<?, ?it/s]

Running testcases... : 100%|██████████| 15114/15114 [10:36<00:00, 23.76it/s]




In [53]:
new_harness.report()

Unnamed: 0,category,test_type,fail_count,pass_count,pass_rate,minimum_pass_rate,pass
0,robustness,add_abbreviation,190,1316,87%,95%,False
1,robustness,add_contraction,15,129,90%,95%,False
2,robustness,add_punctuation,5,47,90%,95%,False
3,robustness,add_slangs,116,999,90%,95%,False
4,robustness,add_speech_to_text_typo,159,1554,91%,95%,False
5,robustness,add_typo,196,1537,89%,95%,False
6,robustness,american_to_british,4,116,97%,95%,True
7,robustness,dyslexia_word_swap,107,1294,92%,95%,False
8,robustness,number_to_word,4,51,93%,95%,False
9,robustness,strip_all_punctuation,259,1553,86%,95%,False
