From de0df1adc9bb525fd00cba0fdc9ed33891054954 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Thu, 20 Jul 2023 14:36:41 +0530
Subject: [PATCH 01/21] Task: support hf dataset for augmentation

---
 langtest/augmentation/__init__.py | 58 +++++++++++++++++++++++--------
 langtest/langtest.py              |  6 ++--
 2 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py
index 0e16c84e8..5ea0084aa 100644
--- a/langtest/augmentation/__init__.py
+++ b/langtest/augmentation/__init__.py
@@ -11,7 +11,7 @@
 
 from langtest.transform import TestFactory
 from langtest.utils.custom_types import Sample
-from langtest.datahandler.datasource import DataFactory
+from langtest.datahandler.datasource import DataFactory, HuggingFaceDataset
 from langtest.transform.utils import create_terminology
 from langtest.utils.custom_types.output import NEROutput
 from langtest.utils.custom_types.predictions import NERPrediction, SequenceLabel
@@ -93,7 +93,12 @@ def __init__(
             with open(self.config) as fread:
                 self.config = yaml.safe_load(fread)
 
-    def fix(self, input_path: str, output_path, export_mode: str = "add"):
+    def fix(
+        self,
+        input_path: Optional[Union[str, dict]],
+        output_path,
+        export_mode: str = "add",
+    ):
         """Applies perturbations to the input data based on the recommendations from harness reports.
 
         Args:
@@ -108,8 +113,17 @@ def fix(self, input_path: str, output_path, export_mode: str = "add"):
         Returns:
             List[Dict[str, Any]]: A list of augmented data samples.
         """
-        self.df = DataFactory(input_path, self.task)
-        data = self.df.load()
+        if type(input_path) == dict:
+            self.df = HuggingFaceDataset(input_path["name"], self.task)
+            data = self.df.load_data(
+                feature_column=input_path.get("feature_column", "text"),
+                target_column=input_path.get("target_column", "label"),
+                split=input_path.get("split", "test"),
+                subset=input_path.get("subset", None),
+            )
+        else:
+            self.df = DataFactory(input_path, self.task)
+            data = self.df.load()
         TestFactory.is_augment = True
         supported_tests = TestFactory.test_scenarios()
         suggest: pd.DataFrame = self.suggestions(self.h_report)
@@ -162,19 +176,33 @@ def fix(self, input_path: str, output_path, export_mode: str = "add"):
                         sample_data = random.choices(data, k=int(sample_length))
                     aug_data, _ = TestFactory.transform(self.task, sample_data, test_type)
                     final_aug_data.extend(aug_data)
+        if type(input_path) == dict:
+            if export_mode == "inplace":
+                final_aug_data = list(hash_map.values())
+                self.df.export_data(final_aug_data, output_path)
+            elif export_mode == "transformed":
+                final_aug_data = [hash_map[i] for i in hash_map if i in sample_indices]
+                self.df.export_data(final_aug_data, output_path)
+            else:
+                data.extend(final_aug_data)
+                self.df.export_data(data, output_path)
+
+            TestFactory.is_augment = False
+            return final_aug_data
 
-        if export_mode == "inplace":
-            final_aug_data = list(hash_map.values())
-            self.df.export(final_aug_data, output_path)
-        elif export_mode == "transformed":
-            final_aug_data = [hash_map[i] for i in hash_map if i in sample_indices]
-            self.df.export(final_aug_data, output_path)
         else:
-            data.extend(final_aug_data)
-            self.df.export(data, output_path)
-
-        TestFactory.is_augment = False
-        return final_aug_data
+            if export_mode == "inplace":
+                final_aug_data = list(hash_map.values())
+                self.df.export(final_aug_data, output_path)
+            elif export_mode == "transformed":
+                final_aug_data = [hash_map[i] for i in hash_map if i in sample_indices]
+                self.df.export(final_aug_data, output_path)
+            else:
+                data.extend(final_aug_data)
+                self.df.export(data, output_path)
+
+            TestFactory.is_augment = False
+            return final_aug_data
 
     def suggestions(self, report: "pd.DataFrame") -> "pd.DataFrame":
         """Calculates suggestions for improving test performance based on a given report.
diff --git a/langtest/langtest.py b/langtest/langtest.py
index cf57e482c..075666afd 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -717,7 +717,7 @@ def generated_results(self) -> Optional[pd.DataFrame]:
 
     def augment(
         self,
-        input_path: str,
+        input_path: Optional[Union[str, dict]],
         output_path: str,
         custom_proportions: Union[Dict, List] = None,
         export_mode: str = "add",
@@ -726,7 +726,9 @@ def augment(
         """Augments the data in the input file located at `input_path` and saves the result to `output_path`.
 
         Args:
-            input_path (str): Path to the input file.
+           input_path (Union[str, dict]): The path to the input data file or a dictionary containing the huggingface dataset directly.
+                                        If a dictionary is provided, the keys 'name', 'feature_column', 'target_column',
+                                        'split', and 'subset' can be used to specify the dataset details.
             output_path (str): Path to save the augmented data.
             custom_proportions (Union[Dict, List]):
             export_mode (str, optional): Determines how the samples are modified or exported.

From 7dcd8cb52252b8d449154c9b31afc663547b003e Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Thu, 20 Jul 2023 14:49:33 +0530
Subject: [PATCH 02/21] fix(augmentation/__init__.py): Bug fix in export_mode =
 transformed

---
 langtest/augmentation/__init__.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py
index 5ea0084aa..1323e3d55 100644
--- a/langtest/augmentation/__init__.py
+++ b/langtest/augmentation/__init__.py
@@ -102,16 +102,17 @@ def fix(
         """Applies perturbations to the input data based on the recommendations from harness reports.
 
         Args:
-            input_path (str): The path to the input data file.
+            input_path (Union[str, dict]): The path to the input data file or a dictionary containing the huggingface dataset directly.
+                                        If a dictionary is provided, the keys 'name', 'feature_column', 'target_column',
+                                        'split', and 'subset' can be used to specify the dataset details.
             output_path (str): The path to save the augmented data file.
             export_mode (str, optional): Determines how the samples are modified or exported.
                                         - 'inplace': Modifies the list of samples in place.
                                         - 'add': Adds new samples to the input data.
                                         - 'transformed': Exports only the transformed data, excluding untransformed samples.
                                         Defaults to 'add'.
-
-        Returns:
-            List[Dict[str, Any]]: A list of augmented data samples.
+            Returns:
+        List[Dict[str, Any]]: A list of augmented data samples.
         """
         if type(input_path) == dict:
             self.df = HuggingFaceDataset(input_path["name"], self.task)
@@ -136,7 +137,7 @@ def fix(
 
         final_aug_data = []
         hash_map = {k: v for k, v in enumerate(data)}
-
+        transformed_data = []
         for proportion in suggest.iterrows():
             cat = proportion[-1]["category"].lower()
             if cat not in ["robustness", "bias"]:
@@ -149,7 +150,7 @@ def fix(
                     * self.max_prop
                     * (proportion[-1]["proportion_increase"] / sum_propotion)
                 )
-                if export_mode in ("inplace", "transformed"):
+                if export_mode in ("inplace"):
                     sample_indices = random.sample(
                         range(0, len(data)), int(sample_length)
                     )
@@ -176,13 +177,16 @@ def fix(
                         sample_data = random.choices(data, k=int(sample_length))
                     aug_data, _ = TestFactory.transform(self.task, sample_data, test_type)
                     final_aug_data.extend(aug_data)
+
+                    if export_mode == "transformed":
+                        transformed_data.extend(aug_data)
         if type(input_path) == dict:
+
             if export_mode == "inplace":
                 final_aug_data = list(hash_map.values())
                 self.df.export_data(final_aug_data, output_path)
             elif export_mode == "transformed":
-                final_aug_data = [hash_map[i] for i in hash_map if i in sample_indices]
-                self.df.export_data(final_aug_data, output_path)
+                self.df.export_data(transformed_data, output_path)
             else:
                 data.extend(final_aug_data)
                 self.df.export_data(data, output_path)
@@ -195,8 +199,7 @@ def fix(
                 final_aug_data = list(hash_map.values())
                 self.df.export(final_aug_data, output_path)
             elif export_mode == "transformed":
-                final_aug_data = [hash_map[i] for i in hash_map if i in sample_indices]
-                self.df.export(final_aug_data, output_path)
+                self.df.export(transformed_data, output_path)
             else:
                 data.extend(final_aug_data)
                 self.df.export(data, output_path)

From 56128c994a9bf4a28e47ad98e7093c31ab21e895 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Thu, 20 Jul 2023 19:48:32 +0530
Subject: [PATCH 03/21] Test(test/test_augmentation.py): added test for
 coverage

---
 langtest/augmentation/__init__.py |  1 -
 tests/test_augmentation.py        | 54 +++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py
index 1323e3d55..030be97a3 100644
--- a/langtest/augmentation/__init__.py
+++ b/langtest/augmentation/__init__.py
@@ -181,7 +181,6 @@ def fix(
                     if export_mode == "transformed":
                         transformed_data.extend(aug_data)
         if type(input_path) == dict:
-
             if export_mode == "inplace":
                 final_aug_data = list(hash_map.values())
                 self.df.export_data(final_aug_data, output_path)
diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 3727c9bd4..815e166b9 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -37,6 +37,20 @@ def setUp(self) -> None:
                 "config": "tests/fixtures/config_ner.yaml",
                 "hub": "huggingface",
             },
+            "spacy_textclassification_csv_dataset": {
+                "task": "text-classification",
+                "model": "textcat_imdb",
+                "data": "imdb/sample.csv",
+                "config": "tests/fixtures/config_ner.yaml",
+                "hub": "spacy",
+            },
+            "huggingface_textclassification_csv_dataset": {
+                "task": "text-classification",
+                "model": "lvwerra/distilbert-imdb",
+                "data": "imdb/sample.csv",
+                "config": "tests/fixtures/config_ner.yaml",
+                "hub": "huggingface",
+            },
         }
 
     def test_augment_robustness(self):
@@ -163,6 +177,46 @@ def test_spacy_templatic_augmentation(self):
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()
         self.assertTrue(is_file_exist)
 
+    def test_csv_dataset_textclassification_hf(self):
+        """
+        Test augmentation using Hugging Face NER model.
+        """
+        harness = Harness(**self.params["huggingface_textclassification_csv_dataset"])
+        self.assertIsInstance(harness, Harness)
+        harness.data = harness.data[:50]
+        report = harness.generate().run().report()
+        self.assertIsInstance(report, pd.DataFrame)
+
+        harness.augment(
+            input_path="imdb/sample.csv",
+            output_path="augmented_train_transformed.csv",
+            export_mode="transformed",
+        )
+        is_file_exist = pl.Path(
+            "tests/fixtures/augmented_train_transformed.csv"
+        ).is_file()
+        self.assertTrue(is_file_exist)
+
+    def test_csv_dataset_textclassification_spacy(self):
+        """
+        Test augmentation using Hugging Face NER model.
+        """
+        harness = Harness(**self.params["spacy_textclassification_csv_dataset"])
+        self.assertIsInstance(harness, Harness)
+        harness.data = harness.data[:50]
+        report = harness.generate().run().report()
+        self.assertIsInstance(report, pd.DataFrame)
+
+        harness.augment(
+            input_path="imdb/sample.csv",
+            output_path="augmented_train_transformed.csv",
+            export_mode="transformed",
+        )
+        is_file_exist = pl.Path(
+            "tests/fixtures/augmented_train_transformed.csv"
+        ).is_file()
+        self.assertTrue(is_file_exist)
+
 
 class TestTemplaticAugmentation(unittest.TestCase):
     """Test case for the TemplaticAugment class"""

From 20347a748ebf25529c46cc1b7489c98dd08e4d3b Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Thu, 20 Jul 2023 20:00:12 +0530
Subject: [PATCH 04/21] Test(test_augmentation.py): Added more tests

---
 tests/test_augmentation.py | 95 +++++++++++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 27 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 815e166b9..0f9f7aca9 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -9,9 +9,7 @@
 
 
 class AugmentWorkflowTestCase(unittest.TestCase):
-    """
-    Test case for the AugmentRobustness class.
-    """
+    """Test case for the AugmentRobustness class."""
 
     def setUp(self) -> None:
         """"""
@@ -51,12 +49,24 @@ def setUp(self) -> None:
                 "config": "tests/fixtures/config_ner.yaml",
                 "hub": "huggingface",
             },
+            "spacy_textclassification_hf_dataset": {
+                "task": "text-classification",
+                "model": "textcat_imdb",
+                "data": {"name": "imdb"},
+                "config": "tests/fixtures/config_ner.yaml",
+                "hub": "spacy",
+            },
+            "huggingface_textclassification_hf_dataset": {
+                "task": "text-classification",
+                "model": "lvwerra/distilbert-imdb",
+                "data": {"name": "imdb"},
+                "config": "tests/fixtures/config_ner.yaml",
+                "hub": "huggingface",
+            },
         }
 
     def test_augment_robustness(self):
-        """
-        Test augmenting data for robustness.
-        """
+        """Test augmenting data for robustness."""
         temp_df = pd.DataFrame(
             {
                 "test_type": [
@@ -90,9 +100,8 @@ def test_augment_robustness(self):
         self.assertTrue(is_file_exist)
 
     def test_hf_ner_augmentation(self):
-        """
-        Test augmentation using Hugging Face NER model.
-        """
+        """Test augmentation using Hugging Face NER model."""
+
         harness = Harness(**self.params["huggingface_ner"])
         self.assertIsInstance(harness, Harness)
         report = harness.generate().run().report()
@@ -107,9 +116,8 @@ def test_hf_ner_augmentation(self):
         self.assertTrue(is_file_exist)
 
     def test_spacy_ner_augmentation(self):
-        """
-        Test augmentation using spaCy NER model.
-        """
+        """Test augmentation using spaCy NER model."""
+
         harness = Harness(**self.params["spacy_ner"])
         self.assertIsInstance(harness, Harness)
         report = harness.generate().run().report()
@@ -124,9 +132,8 @@ def test_spacy_ner_augmentation(self):
         self.assertTrue(is_file_exist)
 
     def test_custom_proportions_augment_harness(self):
-        """
-        Test augmentation with custom proportions using Hugging Face NER model.
-        """
+        """Test augmentation with custom proportions using Hugging Face NER model."""
+
         harness = Harness(**self.params["huggingface_ner"])
         self.assertIsInstance(harness, Harness)
         report = harness.generate().run().report()
@@ -145,9 +152,8 @@ def test_custom_proportions_augment_harness(self):
         self.assertTrue(is_file_exist)
 
     def test_templatic_augmentation(self):
-        """
-        Test augmentation using templatic augmentation.
-        """
+        """Test augmentation using templatic augmentation."""
+
         generator = TemplaticAugment(
             templates=["I living in {LOC}", "you are working in {ORG}"],
             task="ner",
@@ -161,9 +167,8 @@ def test_templatic_augmentation(self):
         self.assertTrue(is_file_exist)
 
     def test_spacy_templatic_augmentation(self):
-        """
-        Test augmentation using templatic augmentation with spaCy NER model.
-        """
+        """Test augmentation using templatic augmentation with spaCy NER model."""
+
         harness = Harness(**self.params["spacy_ner"])
         self.assertIsInstance(harness, Harness)
         report = harness.generate().run().report()
@@ -178,9 +183,8 @@ def test_spacy_templatic_augmentation(self):
         self.assertTrue(is_file_exist)
 
     def test_csv_dataset_textclassification_hf(self):
-        """
-        Test augmentation using Hugging Face NER model.
-        """
+        """Test augmentation using Hugging Face text-classification model."""
+
         harness = Harness(**self.params["huggingface_textclassification_csv_dataset"])
         self.assertIsInstance(harness, Harness)
         harness.data = harness.data[:50]
@@ -198,9 +202,8 @@ def test_csv_dataset_textclassification_hf(self):
         self.assertTrue(is_file_exist)
 
     def test_csv_dataset_textclassification_spacy(self):
-        """
-        Test augmentation using Hugging Face NER model.
-        """
+        """Test augmentation using Spacy text-classification model."""
+
         harness = Harness(**self.params["spacy_textclassification_csv_dataset"])
         self.assertIsInstance(harness, Harness)
         harness.data = harness.data[:50]
@@ -217,6 +220,44 @@ def test_csv_dataset_textclassification_spacy(self):
         ).is_file()
         self.assertTrue(is_file_exist)
 
+    def test_hf_dataset_textclassification_hf(self):
+        """Test augmentation using Hugging Face text-classification model."""
+
+        harness = Harness(**self.params["huggingface_textclassification_hf_dataset"])
+        self.assertIsInstance(harness, Harness)
+        harness.data = harness.data[:50]
+        report = harness.generate().run().report()
+        self.assertIsInstance(report, pd.DataFrame)
+
+        harness.augment(
+            input_path={"name": "imdb"},
+            output_path="augmented_train_transformed.csv",
+            export_mode="transformed",
+        )
+        is_file_exist = pl.Path(
+            "tests/fixtures/augmented_train_transformed.csv"
+        ).is_file()
+        self.assertTrue(is_file_exist)
+
+    def test_hf_dataset_textclassification_spacy(self):
+        """Test augmentation using Spacy text-classification model."""
+
+        harness = Harness(**self.params["spacy_textclassification_hf_dataset"])
+        self.assertIsInstance(harness, Harness)
+        harness.data = harness.data[:50]
+        report = harness.generate().run().report()
+        self.assertIsInstance(report, pd.DataFrame)
+
+        harness.augment(
+            input_path={"name": "imdb"},
+            output_path="augmented_train_transformed.csv",
+            export_mode="transformed",
+        )
+        is_file_exist = pl.Path(
+            "tests/fixtures/augmented_train_transformed.csv"
+        ).is_file()
+        self.assertTrue(is_file_exist)
+
 
 class TestTemplaticAugmentation(unittest.TestCase):
     """Test case for the TemplaticAugment class"""

From d269f0852e2e76731c3c286c05c74f6478058a32 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Thu, 20 Jul 2023 20:28:52 +0530
Subject: [PATCH 05/21] Test(test/test_augmentation.py): updated path

---
 tests/test_augmentation.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 0f9f7aca9..d546ea589 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -38,14 +38,14 @@ def setUp(self) -> None:
             "spacy_textclassification_csv_dataset": {
                 "task": "text-classification",
                 "model": "textcat_imdb",
-                "data": "imdb/sample.csv",
+                "data": "tests/fixtures/text_classification.csv",
                 "config": "tests/fixtures/config_ner.yaml",
                 "hub": "spacy",
             },
             "huggingface_textclassification_csv_dataset": {
                 "task": "text-classification",
                 "model": "lvwerra/distilbert-imdb",
-                "data": "imdb/sample.csv",
+                "data": "tests/fixtures/text_classification.csv",
                 "config": "tests/fixtures/config_ner.yaml",
                 "hub": "huggingface",
             },
@@ -192,12 +192,12 @@ def test_csv_dataset_textclassification_hf(self):
         self.assertIsInstance(report, pd.DataFrame)
 
         harness.augment(
-            input_path="imdb/sample.csv",
-            output_path="augmented_train_transformed.csv",
+            input_path="tests/fixtures/text_classification.csv",
+            output_path="tests/fixtures/augmented_text_classification.csv",
             export_mode="transformed",
         )
         is_file_exist = pl.Path(
-            "tests/fixtures/augmented_train_transformed.csv"
+            "tests/fixtures/augmented_text_classification.csv"
         ).is_file()
         self.assertTrue(is_file_exist)
 
@@ -211,12 +211,12 @@ def test_csv_dataset_textclassification_spacy(self):
         self.assertIsInstance(report, pd.DataFrame)
 
         harness.augment(
-            input_path="imdb/sample.csv",
-            output_path="augmented_train_transformed.csv",
+            input_path="tests/fixtures/text_classification.csv",
+            output_path="tests/fixtures/augmented_text_classification.csv",
             export_mode="transformed",
         )
         is_file_exist = pl.Path(
-            "tests/fixtures/augmented_train_transformed.csv"
+            "tests/fixtures/augmented_text_classification.csv"
         ).is_file()
         self.assertTrue(is_file_exist)
 
@@ -231,7 +231,7 @@ def test_hf_dataset_textclassification_hf(self):
 
         harness.augment(
             input_path={"name": "imdb"},
-            output_path="augmented_train_transformed.csv",
+            output_path="tests/fixtures/augmented_train_transformed.csv",
             export_mode="transformed",
         )
         is_file_exist = pl.Path(
@@ -250,7 +250,7 @@ def test_hf_dataset_textclassification_spacy(self):
 
         harness.augment(
             input_path={"name": "imdb"},
-            output_path="augmented_train_transformed.csv",
+            output_path="tests/fixtures/augmented_train_transformed.csv",
             export_mode="transformed",
         )
         is_file_exist = pl.Path(

From fe566c4fa67ae475f9af3b5e6bd9952238f04914 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Thu, 20 Jul 2023 21:03:47 +0530
Subject: [PATCH 06/21] task(test_augmentation.py): Updated the config path for
 text-classification

---
 tests/test_augmentation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index d546ea589..263f33b57 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -53,14 +53,14 @@ def setUp(self) -> None:
                 "task": "text-classification",
                 "model": "textcat_imdb",
                 "data": {"name": "imdb"},
-                "config": "tests/fixtures/config_ner.yaml",
+                "config": "tests/fixtures/config_text_classification.yaml",
                 "hub": "spacy",
             },
             "huggingface_textclassification_hf_dataset": {
                 "task": "text-classification",
                 "model": "lvwerra/distilbert-imdb",
                 "data": {"name": "imdb"},
-                "config": "tests/fixtures/config_ner.yaml",
+                "config": "tests/fixtures/config_text_classification.yaml",
                 "hub": "huggingface",
             },
         }

From 795e6393372a8efc82af26370cc5d8efbb86f38b Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Thu, 20 Jul 2023 21:17:45 +0530
Subject: [PATCH 07/21] task(test_augmentation.py): Updated the config path for
 text-classification

---
 tests/test_augmentation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 263f33b57..8d0270a98 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -39,14 +39,14 @@ def setUp(self) -> None:
                 "task": "text-classification",
                 "model": "textcat_imdb",
                 "data": "tests/fixtures/text_classification.csv",
-                "config": "tests/fixtures/config_ner.yaml",
+                "config": "tests/fixtures/config_text_classification.yaml",
                 "hub": "spacy",
             },
             "huggingface_textclassification_csv_dataset": {
                 "task": "text-classification",
                 "model": "lvwerra/distilbert-imdb",
                 "data": "tests/fixtures/text_classification.csv",
-                "config": "tests/fixtures/config_ner.yaml",
+                "config": "tests/fixtures/config_text_classification.yaml",
                 "hub": "huggingface",
             },
             "spacy_textclassification_hf_dataset": {

From 81c9835c5f5a48e6d6c8f441cb9a1ad51e486fc6 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Thu, 20 Jul 2023 22:56:07 +0530
Subject: [PATCH 08/21] Task(test_augmentation): Added custom proportions

---
 tests/test_augmentation.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 8d0270a98..b0489de58 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -190,10 +190,11 @@ def test_csv_dataset_textclassification_hf(self):
         harness.data = harness.data[:50]
         report = harness.generate().run().report()
         self.assertIsInstance(report, pd.DataFrame)
-
+        custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
             input_path="tests/fixtures/text_classification.csv",
             output_path="tests/fixtures/augmented_text_classification.csv",
+            custom_proportions=custom_proportions,
             export_mode="transformed",
         )
         is_file_exist = pl.Path(
@@ -209,10 +210,11 @@ def test_csv_dataset_textclassification_spacy(self):
         harness.data = harness.data[:50]
         report = harness.generate().run().report()
         self.assertIsInstance(report, pd.DataFrame)
-
+        custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
             input_path="tests/fixtures/text_classification.csv",
             output_path="tests/fixtures/augmented_text_classification.csv",
+            custom_proportions=custom_proportions,
             export_mode="transformed",
         )
         is_file_exist = pl.Path(
@@ -228,10 +230,11 @@ def test_hf_dataset_textclassification_hf(self):
         harness.data = harness.data[:50]
         report = harness.generate().run().report()
         self.assertIsInstance(report, pd.DataFrame)
-
+        custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
             input_path={"name": "imdb"},
             output_path="tests/fixtures/augmented_train_transformed.csv",
+            custom_proportions=custom_proportions,
             export_mode="transformed",
         )
         is_file_exist = pl.Path(
@@ -247,10 +250,11 @@ def test_hf_dataset_textclassification_spacy(self):
         harness.data = harness.data[:50]
         report = harness.generate().run().report()
         self.assertIsInstance(report, pd.DataFrame)
-
+        custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
             input_path={"name": "imdb"},
             output_path="tests/fixtures/augmented_train_transformed.csv",
+            custom_proportions=custom_proportions,
             export_mode="transformed",
         )
         is_file_exist = pl.Path(

From ffeeb3fcdeecb6658efa3c7f356e9467b0903ca0 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Fri, 21 Jul 2023 00:55:45 +0530
Subject: [PATCH 09/21] task(langtest.py): Updated Args

---
 langtest/langtest.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 075666afd..250981943 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -717,8 +717,8 @@ def generated_results(self) -> Optional[pd.DataFrame]:
 
     def augment(
         self,
-        input_path: Optional[Union[str, dict]],
-        output_path: str,
+        training_data: dict,
+        augmented_data: str,
         custom_proportions: Union[Dict, List] = None,
         export_mode: str = "add",
         templates: Optional[Union[str, List[str]]] = None,
@@ -726,10 +726,8 @@ def augment(
         """Augments the data in the input file located at `input_path` and saves the result to `output_path`.
 
         Args:
-           input_path (Union[str, dict]): The path to the input data file or a dictionary containing the huggingface dataset directly.
-                                        If a dictionary is provided, the keys 'name', 'feature_column', 'target_column',
-                                        'split', and 'subset' can be used to specify the dataset details.
-            output_path (str): Path to save the augmented data.
+            training_data (dict): A dictionary containing the input data for augmentation.
+            augmented_data (str): Path to save the augmented data.
             custom_proportions (Union[Dict, List]):
             export_mode (str, optional): Determines how the samples are modified or exported.
                                     - 'inplace': Modifies the list of samples in place.
@@ -746,9 +744,6 @@ def augment(
         Note:
             This method uses an instance of `AugmentRobustness` to perform the augmentation.
 
-        Example:
-            >>> harness = Harness(...)
-            >>> harness.augment("train.conll", "augmented_train.conll")
         """
         dtypes = list(
             map(
@@ -788,7 +783,7 @@ def augment(
             _ = TemplaticAugment(
                 templates=templates,
                 task=self.task,
-            ).fix(input_path=input_path, output_path=output_path)
+            ).fix(training_data=training_data, output_path=augmented_data)
 
         else:
             _ = AugmentRobustness(
@@ -796,7 +791,11 @@ def augment(
                 config=self._config,
                 h_report=self.df_report,
                 custom_proportions=custom_proportions,
-            ).fix(input_path=input_path, output_path=output_path, export_mode=export_mode)
+            ).fix(
+                training_data=training_data,
+                output_path=augmented_data,
+                export_mode=export_mode,
+            )
 
         return self
 

From 631bd4a70aa5bcdeba46c242cbc0ea5ebe254e68 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Fri, 21 Jul 2023 01:09:32 +0530
Subject: [PATCH 10/21] task(augmentation/__init__.py): Updated Args

---
 langtest/augmentation/__init__.py | 37 +++++++++++++++----------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py
index 030be97a3..d870ccea9 100644
--- a/langtest/augmentation/__init__.py
+++ b/langtest/augmentation/__init__.py
@@ -95,16 +95,14 @@ def __init__(
 
     def fix(
         self,
-        input_path: Optional[Union[str, dict]],
-        output_path,
+        training_data: dict,
+        output_path: str,
         export_mode: str = "add",
     ):
         """Applies perturbations to the input data based on the recommendations from harness reports.
 
         Args:
-            input_path (Union[str, dict]): The path to the input data file or a dictionary containing the huggingface dataset directly.
-                                        If a dictionary is provided, the keys 'name', 'feature_column', 'target_column',
-                                        'split', and 'subset' can be used to specify the dataset details.
+            training_data (dict): A dictionary containing the input data for augmentation.
             output_path (str): The path to save the augmented data file.
             export_mode (str, optional): Determines how the samples are modified or exported.
                                         - 'inplace': Modifies the list of samples in place.
@@ -114,16 +112,17 @@ def fix(
             Returns:
         List[Dict[str, Any]]: A list of augmented data samples.
         """
-        if type(input_path) == dict:
-            self.df = HuggingFaceDataset(input_path["name"], self.task)
-            data = self.df.load_data(
-                feature_column=input_path.get("feature_column", "text"),
-                target_column=input_path.get("target_column", "label"),
-                split=input_path.get("split", "test"),
-                subset=input_path.get("subset", None),
-            )
+        if len(training_data) > 1:
+            if "." not in training_data["data_source"]:
+                self.df = HuggingFaceDataset(training_data["data_source"], self.task)
+                data = self.df.load_data(
+                    feature_column=training_data.get("feature_column", "text"),
+                    target_column=training_data.get("target_column", "label"),
+                    split=training_data.get("split", "test"),
+                    subset=training_data.get("subset", None),
+                )
         else:
-            self.df = DataFactory(input_path, self.task)
+            self.df = DataFactory(training_data["data_source"], self.task)
             data = self.df.load()
         TestFactory.is_augment = True
         supported_tests = TestFactory.test_scenarios()
@@ -180,7 +179,7 @@ def fix(
 
                     if export_mode == "transformed":
                         transformed_data.extend(aug_data)
-        if type(input_path) == dict:
+        if len(training_data) > 1:
             if export_mode == "inplace":
                 final_aug_data = list(hash_map.values())
                 self.df.export_data(final_aug_data, output_path)
@@ -301,7 +300,7 @@ class TemplaticAugment(BaseAugmentaion):
 
     Methods:
     __init__(self, templates: Union[str, List[str]], task: str): Initializes the TemplaticAugment class.
-    fix(self, input_path: str, output_path: str, *args, **kwargs): Performs the templatic augmentation and exports the results to a specified path.
+    fix(self, training_data: str, output_path: str, *args, **kwargs): Performs the templatic augmentation and exports the results to a specified path.
     """
 
     def __init__(self, templates: Union[str, List[str]], task: str) -> None:
@@ -322,13 +321,13 @@ def __init__(self, templates: Union[str, List[str]], task: str) -> None:
         elif isinstance(self.__templates, list) and isinstance(self.__templates[0], str):
             self.__templates = [self.str_to_sample(i) for i in self.__templates]
 
-    def fix(self, input_path: str, output_path: str, max_num=None, *args, **kwargs):
+    def fix(self, training_data: str, output_path: str, max_num=None, *args, **kwargs):
         """
         This method is used to perform the templatic augmentation.
         It takes the input data, performs the augmentation and then saves the augmented data to the output path.
 
         Parameters:
-        input_path (str): The path to the input data.
+        training_data (dict): A dictionary containing the input data for augmentation.
         output_path (str): The path where the augmented data will be saved.
         *args: Variable length argument list.
         **kwargs: Arbitrary keyword arguments.
@@ -337,7 +336,7 @@ def fix(self, input_path: str, output_path: str, max_num=None, *args, **kwargs):
         bool: Returns True upon successful completion of the method.
         """
 
-        df = DataFactory(input_path, self.__task)
+        df = DataFactory(training_data["data_source"], self.__task)
         data = df.load()
         new_data = []
         self.__search_results = self.search_sample_results(data)

From ef2bd6c7c96739d24c5fef4bd104773a3aa179f1 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Fri, 21 Jul 2023 01:50:41 +0530
Subject: [PATCH 11/21] update: test augmentation

---
 tests/test_augmentation.py | 67 +++++++++++++-------------------------
 1 file changed, 22 insertions(+), 45 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index b0489de58..57f6d030c 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -35,13 +35,6 @@ def setUp(self) -> None:
                 "config": "tests/fixtures/config_ner.yaml",
                 "hub": "huggingface",
             },
-            "spacy_textclassification_csv_dataset": {
-                "task": "text-classification",
-                "model": "textcat_imdb",
-                "data": "tests/fixtures/text_classification.csv",
-                "config": "tests/fixtures/config_text_classification.yaml",
-                "hub": "spacy",
-            },
             "huggingface_textclassification_csv_dataset": {
                 "task": "text-classification",
                 "model": "lvwerra/distilbert-imdb",
@@ -91,7 +84,8 @@ def test_augment_robustness(self):
             config=yaml.safe_load("tests/fixtures/config_ner.yaml"),
         )
         augment.fix(
-            "tests/fixtures/train.conll", "tests/fixtures/augmentated_train.conll"
+            training_data={"data_source": "tests/fixtures/train.conll"},
+            output_path="tests/fixtures/augmentated_train.conll",
         )
         self.assertIsInstance(augment, AugmentRobustness)
         self.assertIsInstance(augment.suggestions(temp_df), pd.DataFrame)
@@ -108,8 +102,10 @@ def test_hf_ner_augmentation(self):
         self.assertIsInstance(report, pd.DataFrame)
 
         harness.augment(
-            "tests/fixtures/train.conll",
-            "tests/fixtures/augmentated_train.conll",
+            training_data={
+                "data_source": "tests/fixtures/train.conll",
+            },
+            augmented_data="tests/fixtures/augmentated_train.conll",
             export_mode="inplace",
         )
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()
@@ -124,8 +120,8 @@ def test_spacy_ner_augmentation(self):
         self.assertIsInstance(report, pd.DataFrame)
 
         harness.augment(
-            "tests/fixtures/train.conll",
-            "tests/fixtures/augmentated_train.conll",
+            training_data={"data_source": "tests/fixtures/train.conll"},
+            augmented_data="tests/fixtures/augmentated_train.conll",
             export_mode="inplace",
         )
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()
@@ -142,8 +138,8 @@ def test_custom_proportions_augment_harness(self):
         proportions = {"uppercase": 0.5, "lowercase": 0.5}
 
         harness.augment(
-            "tests/fixtures/train.conll",
-            "tests/fixtures/augmentated_train.conll",
+            training_data={"data_source": "tests/fixtures/train.conll"},
+            augmented_data="tests/fixtures/augmentated_train.conll",
             custom_proportions=proportions,
             export_mode="inplace",
         )
@@ -160,8 +156,8 @@ def test_templatic_augmentation(self):
         )
         self.assertIsInstance(generator, TemplaticAugment)
         generator.fix(
-            "tests/fixtures/train.conll",
-            "tests/fixtures/augmentated_train.conll",
+            training_data={"data_source": "tests/fixtures/train.conll"},
+            output_path="tests/fixtures/augmentated_train.conll",
         )
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()
         self.assertTrue(is_file_exist)
@@ -175,8 +171,8 @@ def test_spacy_templatic_augmentation(self):
         self.assertIsInstance(report, pd.DataFrame)
 
         harness.augment(
-            "tests/fixtures/train.conll",
-            "tests/fixtures/augmentated_train.conll",
+            training_data={"data_source": "tests/fixtures/train.conll"},
+            augmented_data="tests/fixtures/augmentated_train.conll",
             templates=["I living in {LOC}", "you are working in {ORG}"],
         )
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()
@@ -192,28 +188,8 @@ def test_csv_dataset_textclassification_hf(self):
         self.assertIsInstance(report, pd.DataFrame)
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
-            input_path="tests/fixtures/text_classification.csv",
-            output_path="tests/fixtures/augmented_text_classification.csv",
-            custom_proportions=custom_proportions,
-            export_mode="transformed",
-        )
-        is_file_exist = pl.Path(
-            "tests/fixtures/augmented_text_classification.csv"
-        ).is_file()
-        self.assertTrue(is_file_exist)
-
-    def test_csv_dataset_textclassification_spacy(self):
-        """Test augmentation using Spacy text-classification model."""
-
-        harness = Harness(**self.params["spacy_textclassification_csv_dataset"])
-        self.assertIsInstance(harness, Harness)
-        harness.data = harness.data[:50]
-        report = harness.generate().run().report()
-        self.assertIsInstance(report, pd.DataFrame)
-        custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
-        harness.augment(
-            input_path="tests/fixtures/text_classification.csv",
-            output_path="tests/fixtures/augmented_text_classification.csv",
+            training_data={"tests/fixtures/text_classification.csv"},
+            augmented_data="tests/fixtures/augmented_text_classification.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",
         )
@@ -232,8 +208,8 @@ def test_hf_dataset_textclassification_hf(self):
         self.assertIsInstance(report, pd.DataFrame)
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
-            input_path={"name": "imdb"},
-            output_path="tests/fixtures/augmented_train_transformed.csv",
+            training_data={"name": "imdb"},
+            augmented_data="tests/fixtures/augmented_train_transformed.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",
         )
@@ -252,8 +228,8 @@ def test_hf_dataset_textclassification_spacy(self):
         self.assertIsInstance(report, pd.DataFrame)
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
-            input_path={"name": "imdb"},
-            output_path="tests/fixtures/augmented_train_transformed.csv",
+            training_data={"name": "imdb"},
+            augmented_data="tests/fixtures/augmented_train_transformed.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",
         )
@@ -335,7 +311,8 @@ def test_fix(self):
             templates=["My name is {PER} and I am from {LOC}"], task="ner"
         )
         generator.fix(
-            input_path=self.conll_path, output_path="/tmp/augmented_conll.conll"
+            training_data={"data_source": self.conll_path},
+            output_path="/tmp/augmented_conll.conll",
         )
         with open("/tmp/augmented_conll.conll", "r") as reader:
             lines = [line.strip() for line in reader.readlines() if line.strip() != ""]

From eed87785a93a8e21c62863be62115db0880ec0c3 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Fri, 21 Jul 2023 02:00:21 +0530
Subject: [PATCH 12/21] task(test_augmentation.py): added data_source

---
 tests/test_augmentation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 57f6d030c..4b8feb993 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -188,7 +188,7 @@ def test_csv_dataset_textclassification_hf(self):
         self.assertIsInstance(report, pd.DataFrame)
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
-            training_data={"tests/fixtures/text_classification.csv"},
+            training_data={"data_source": "tests/fixtures/text_classification.csv"},
             augmented_data="tests/fixtures/augmented_text_classification.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",
@@ -208,7 +208,7 @@ def test_hf_dataset_textclassification_hf(self):
         self.assertIsInstance(report, pd.DataFrame)
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
-            training_data={"name": "imdb"},
+            training_data={"data_source": "imdb"},
             augmented_data="tests/fixtures/augmented_train_transformed.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",
@@ -228,7 +228,7 @@ def test_hf_dataset_textclassification_spacy(self):
         self.assertIsInstance(report, pd.DataFrame)
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
-            training_data={"name": "imdb"},
+            training_data={"data_source": "imdb"},
             augmented_data="tests/fixtures/augmented_train_transformed.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",

From f331a236f90b01b3f27a9f693eeafc4a47c8ff5f Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Fri, 21 Jul 2023 02:33:09 +0530
Subject: [PATCH 13/21] updated augmentation/__init__.py

---
 langtest/augmentation/__init__.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py
index d870ccea9..f9f835043 100644
--- a/langtest/augmentation/__init__.py
+++ b/langtest/augmentation/__init__.py
@@ -112,15 +112,14 @@ def fix(
             Returns:
         List[Dict[str, Any]]: A list of augmented data samples.
         """
-        if len(training_data) > 1:
-            if "." not in training_data["data_source"]:
-                self.df = HuggingFaceDataset(training_data["data_source"], self.task)
-                data = self.df.load_data(
-                    feature_column=training_data.get("feature_column", "text"),
-                    target_column=training_data.get("target_column", "label"),
-                    split=training_data.get("split", "test"),
-                    subset=training_data.get("subset", None),
-                )
+        if "." not in training_data["data_source"]:
+            self.df = HuggingFaceDataset(training_data["data_source"], self.task)
+            data = self.df.load_data(
+                feature_column=training_data.get("feature_column", "text"),
+                target_column=training_data.get("target_column", "label"),
+                split=training_data.get("split", "test"),
+                subset=training_data.get("subset", None),
+            )
         else:
             self.df = DataFactory(training_data["data_source"], self.task)
             data = self.df.load()
@@ -179,7 +178,7 @@ def fix(
 
                     if export_mode == "transformed":
                         transformed_data.extend(aug_data)
-        if len(training_data) > 1:
+        if "." not in training_data["data_source"]:
             if export_mode == "inplace":
                 final_aug_data = list(hash_map.values())
                 self.df.export_data(final_aug_data, output_path)

From 4e3be6c247c7634227586a834ada8726545da713 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Tue, 1 Aug 2023 23:49:10 +0530
Subject: [PATCH 14/21] website and notebook updated

---
 .../misc/Augmentation_Control_Notebook.ipynb  | 1815 ++++++++++++++---
 docs/pages/docs/generate_augmentation.md      |   30 +-
 2 files changed, 1585 insertions(+), 260 deletions(-)

diff --git a/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb b/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb
index e9a5ad4c4..1e14fd913 100644
--- a/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb
+++ b/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb
@@ -1,7 +1,6 @@
 {
   "cells": [
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "e7PsSmy9sCoR"
@@ -11,7 +10,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "MhgkQYQiEvZt"
@@ -21,7 +19,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "WJJzt3RWhEc6"
@@ -33,7 +30,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "26qXWhCYhHAt"
@@ -54,7 +50,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "Jx4OHnOchSeC"
@@ -75,7 +70,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "yR6kjOaiheKN"
@@ -88,7 +82,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": 7,
       "metadata": {
         "id": "lTzSJpMlhgq5"
       },
@@ -99,7 +93,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "sBcZjwJBhkOw"
@@ -113,10 +106,10 @@
         "\n",
         "\n",
         "| Parameter  | Description |  \n",
-        "| - | - | \n",
+        "| - | - |\n",
         "|**task**     |Task for which the model is to be evaluated (text-classification or ner)|\n",
         "|**model**     |PipelineModel or path to a saved model or pretrained pipeline/model from hub.\n",
-        "|**data**       |Path to the data that is to be used for evaluation. Can be .csv or .conll file in the CoNLL format \n",
+        "|**data**       |Path to the data that is to be used for evaluation. Can be .csv or .conll file in the CoNLL format\n",
         "|**config**     |Configuration for the tests to be performed, specified in form of a YAML file.\n",
         "|**hub**       |model hub to load from the path. Required if model param is passed as path.|\n",
         "\n",
@@ -125,7 +118,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "JFhJ9CcbsKqN"
@@ -137,7 +129,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "UtxtE6Y0r4CJ"
@@ -151,7 +142,7 @@
         "\n",
         "2. Test NER model robustness on CoNLL test set\n",
         "\n",
-        "3. Augment CoNLL training set based on test results \n",
+        "3. Augment CoNLL training set based on test results\n",
         "\n",
         "4. Train new NER model on augmented CoNLL training set\n",
         "\n",
@@ -161,7 +152,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "I21Jmq79jgC6"
@@ -186,7 +176,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "MNtH_HOUt_PL"
@@ -197,7 +186,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": 9,
       "metadata": {
         "id": "jRnEmCfPhsZs"
       },
@@ -208,13 +197,13 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": 10,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "bHXeP18sGp-g",
-        "outputId": "1bd2ea97-e002-451b-d60b-cae915c78fb6"
+        "outputId": "f50e09d2-8c9c-44d5-9287-be7014d1307f"
       },
       "outputs": [
         {
@@ -233,7 +222,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "kKgXC7cvuyar"
@@ -244,35 +232,85 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 17,
+      "execution_count": 11,
       "metadata": {
-        "id": "RVk9NWn7u-Lm"
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "RVk9NWn7u-Lm",
+        "outputId": "d542c0fe-78fe-40cd-ce96-a4040b9b040f"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Test Configuration : \n",
+            " {\n",
+            " \"tests\": {\n",
+            "  \"defaults\": {\n",
+            "   \"min_pass_rate\": 1.0\n",
+            "  },\n",
+            "  \"robustness\": {\n",
+            "   \"add_typo\": {\n",
+            "    \"min_pass_rate\": 0.7\n",
+            "   },\n",
+            "   \"american_to_british\": {\n",
+            "    \"min_pass_rate\": 0.7\n",
+            "   }\n",
+            "  },\n",
+            "  \"accuracy\": {\n",
+            "   \"min_micro_f1_score\": {\n",
+            "    \"min_score\": 0.7\n",
+            "   }\n",
+            "  },\n",
+            "  \"bias\": {\n",
+            "   \"replace_to_female_pronouns\": {\n",
+            "    \"min_pass_rate\": 0.7\n",
+            "   },\n",
+            "   \"replace_to_low_income_country\": {\n",
+            "    \"min_pass_rate\": 0.7\n",
+            "   }\n",
+            "  },\n",
+            "  \"fairness\": {\n",
+            "   \"min_gender_f1_score\": {\n",
+            "    \"min_score\": 0.6\n",
+            "   }\n",
+            "  },\n",
+            "  \"representation\": {\n",
+            "   \"min_label_representation_count\": {\n",
+            "    \"min_count\": 50\n",
+            "   }\n",
+            "  }\n",
+            " }\n",
+            "}\n"
+          ]
+        }
+      ],
       "source": [
         "harness = Harness(task=\"ner\", model=ner_model, data=\"sample.conll\", hub=\"johnsnowlabs\")"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 18,
+      "execution_count": 12,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "mynkAUwZyuFN",
-        "outputId": "a7b97865-fc75-4070-c5b4-0533617a7782"
+        "outputId": "1ad0c141-bc67-4ac1-bff7-d102a71b8693"
       },
       "outputs": [
         {
           "data": {
             "text/plain": [
               "{'tests': {'defaults': {'min_pass_rate': 0.65},\n",
-              " 'robustness': {'add_typo': {'min_pass_rate': 0.65},\n",
+              "  'robustness': {'add_typo': {'min_pass_rate': 0.65},\n",
               "   'lowercase': {'min_pass_rate': 0.65}}}}"
             ]
           },
-          "execution_count": 18,
+          "execution_count": 12,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -281,9 +319,9 @@
         "harness.configure({\n",
         "    'tests': {\n",
         "        'defaults': {'min_pass_rate': 0.65},\n",
-        "        \n",
+        "\n",
         "        'robustness': {\n",
-        "            'add_typo': {'min_pass_rate': 0.65}, \n",
+        "            'add_typo': {'min_pass_rate': 0.65},\n",
         "            'lowercase':{'min_pass_rate': 0.65},\n",
         "        }\n",
         "    }\n",
@@ -291,7 +329,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "ZPU46A7WigFr"
@@ -301,7 +338,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "MomLlmTwjpzU"
@@ -315,20 +351,27 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 19,
+      "execution_count": 13,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "UiUNzTwF89ye",
-        "outputId": "1ec7fe1f-c342-45da-b919-d48e8e082341"
+        "outputId": "f77a840d-a816-4d2c-9de6-a8a991f047b5"
       },
       "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 5526.09it/s]\n"
+          ]
+        },
         {
           "data": {
             "text/plain": []
           },
-          "execution_count": 19,
+          "execution_count": 13,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -338,7 +381,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "UiMIF-o49Bg_"
@@ -349,21 +391,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 20,
+      "execution_count": 15,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 423
         },
         "id": "p0tTwFfc891k",
-        "outputId": "05b03712-2723-418a-936e-2cbbc818f215"
+        "outputId": "3676052a-635b-4cc3-b23d-1e44f097065b"
       },
       "outputs": [
         {
           "data": {
             "text/html": [
               "\n",
-              "  <div id=\"df-59f01293-d56b-43a3-a26e-ae2bc1a8b9a5\">\n",
+              "\n",
+              "  <div id=\"df-9e9af804-bce3-4d1d-9823-78bdbaf95161\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -387,7 +430,6 @@
               "      <th>test_type</th>\n",
               "      <th>original</th>\n",
               "      <th>test_case</th>\n",
-              "      <th>expected_result</th>\n",
               "    </tr>\n",
               "  </thead>\n",
               "  <tbody>\n",
@@ -396,40 +438,35 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
-              "      <td>SOCCER - JAPAN GET LUCKY WIN , DHINA IN SURPRI...</td>\n",
-              "      <td>JAPAN: B-LOC, CHINA: B-PER</td>\n",
+              "      <td>SOCCER - JAPAZ GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Nadim Ladki</td>\n",
-              "      <td>Padim Ladki</td>\n",
-              "      <td>Nadim: B-PER, Ladki: I-PER</td>\n",
+              "      <td>Zadim Ladki</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>2</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>AL-AIN , United Arab Emirates 1996-12-06</td>\n",
-              "      <td>AL-AIN , United Arab Emirates1 996-12-06</td>\n",
-              "      <td>AL-AIN: B-LOC, United: B-LOC, Arab: I-LOC, Emi...</td>\n",
+              "      <td>AL-SIN , United Arab Emirates 1996-12-06</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>3</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Japan began the defence of their Asian Cup tit...</td>\n",
-              "      <td>Japan began the sefence of their Asian Cup tit...</td>\n",
-              "      <td>Japan: B-LOC, Asian: B-MISC, Cup: I-MISC, Syri...</td>\n",
+              "      <td>Japan began the defence of their Asian Cup tit...</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>4</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>But China saw their luck desert them in the se...</td>\n",
-              "      <td>But China saw their luck desert them in the se...</td>\n",
-              "      <td>China: B-LOC, Uzbekistan: B-LOC</td>\n",
+              "      <td>But China saw their luck desert them in yhe se...</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>...</th>\n",
@@ -437,7 +474,6 @@
               "      <td>...</td>\n",
               "      <td>...</td>\n",
               "      <td>...</td>\n",
-              "      <td>...</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>447</th>\n",
@@ -445,7 +481,6 @@
               "      <td>lowercase</td>\n",
               "      <td>Portuguesa 1 Atletico Mineiro 0</td>\n",
               "      <td>portuguesa 1 atletico mineiro 0</td>\n",
-              "      <td>Portuguesa: B-ORG, Atletico: B-ORG, Mineiro: I...</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>448</th>\n",
@@ -453,7 +488,6 @@
               "      <td>lowercase</td>\n",
               "      <td>CRICKET - LARA ENDURES ANOTHER MISERABLE DAY .</td>\n",
               "      <td>cricket - lara endures another miserable day .</td>\n",
-              "      <td>LARA: B-PER</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>449</th>\n",
@@ -461,7 +495,6 @@
               "      <td>lowercase</td>\n",
               "      <td>Robert Galvin</td>\n",
               "      <td>robert galvin</td>\n",
-              "      <td>Robert: B-PER, Galvin: I-PER</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>450</th>\n",
@@ -469,7 +502,6 @@
               "      <td>lowercase</td>\n",
               "      <td>MELBOURNE 1996-12-06</td>\n",
               "      <td>melbourne 1996-12-06</td>\n",
-              "      <td>MELBOURNE: B-LOC</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>451</th>\n",
@@ -477,24 +509,90 @@
               "      <td>lowercase</td>\n",
               "      <td>Australia gave Brian Lara another reason to be...</td>\n",
               "      <td>australia gave brian lara another reason to be...</td>\n",
-              "      <td>Australia: B-LOC, Brian: B-PER, Lara: I-PER, W...</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
-              "<p>452 rows × 5 columns</p>\n",
+              "<p>452 rows × 4 columns</p>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-59f01293-d56b-43a3-a26e-ae2bc1a8b9a5')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9e9af804-bce3-4d1d-9823-78bdbaf95161')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
-              "        \n",
+              "\n",
               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
               "       width=\"24px\">\n",
               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
               "  </svg>\n",
               "      </button>\n",
-              "      \n",
-              "  <style>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-1ddf75bf-4d68-44e3-93c3-70367a2b3e07\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-1ddf75bf-4d68-44e3-93c3-70367a2b3e07')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-1ddf75bf-4d68-44e3-93c3-70367a2b3e07 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
               "    .colab-df-container {\n",
               "      display:flex;\n",
               "      flex-wrap:wrap;\n",
@@ -534,12 +632,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-59f01293-d56b-43a3-a26e-ae2bc1a8b9a5 button.colab-df-convert');\n",
+              "          document.querySelector('#df-9e9af804-bce3-4d1d-9823-78bdbaf95161 button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-59f01293-d56b-43a3-a26e-ae2bc1a8b9a5');\n",
+              "          const element = document.querySelector('#df-9e9af804-bce3-4d1d-9823-78bdbaf95161');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -557,8 +655,7 @@
               "        }\n",
               "      </script>\n",
               "    </div>\n",
-              "  </div>\n",
-              "  "
+              "  </div>\n"
             ],
             "text/plain": [
               "       category  test_type                                           original  \\\n",
@@ -574,36 +671,23 @@
               "450  robustness  lowercase                               MELBOURNE 1996-12-06   \n",
               "451  robustness  lowercase  Australia gave Brian Lara another reason to be...   \n",
               "\n",
-              "                                             test_case  \\\n",
-              "0    SOCCER - JAPAN GET LUCKY WIN , DHINA IN SURPRI...   \n",
-              "1                                          Padim Ladki   \n",
-              "2             AL-AIN , United Arab Emirates1 996-12-06   \n",
-              "3    Japan began the sefence of their Asian Cup tit...   \n",
-              "4    But China saw their luck desert them in the se...   \n",
-              "..                                                 ...   \n",
-              "447                    portuguesa 1 atletico mineiro 0   \n",
-              "448     cricket - lara endures another miserable day .   \n",
-              "449                                      robert galvin   \n",
-              "450                               melbourne 1996-12-06   \n",
-              "451  australia gave brian lara another reason to be...   \n",
-              "\n",
-              "                                       expected_result  \n",
-              "0                           JAPAN: B-LOC, CHINA: B-PER  \n",
-              "1                           Nadim: B-PER, Ladki: I-PER  \n",
-              "2    AL-AIN: B-LOC, United: B-LOC, Arab: I-LOC, Emi...  \n",
-              "3    Japan: B-LOC, Asian: B-MISC, Cup: I-MISC, Syri...  \n",
-              "4                      China: B-LOC, Uzbekistan: B-LOC  \n",
+              "                                             test_case  \n",
+              "0    SOCCER - JAPAZ GET LUCKY WIN , CHINA IN SURPRI...  \n",
+              "1                                          Zadim Ladki  \n",
+              "2             AL-SIN , United Arab Emirates 1996-12-06  \n",
+              "3    Japan began the defence of their Asian Cup tit...  \n",
+              "4    But China saw their luck desert them in yhe se...  \n",
               "..                                                 ...  \n",
-              "447  Portuguesa: B-ORG, Atletico: B-ORG, Mineiro: I...  \n",
-              "448                                        LARA: B-PER  \n",
-              "449                       Robert: B-PER, Galvin: I-PER  \n",
-              "450                                   MELBOURNE: B-LOC  \n",
-              "451  Australia: B-LOC, Brian: B-PER, Lara: I-PER, W...  \n",
+              "447                    portuguesa 1 atletico mineiro 0  \n",
+              "448     cricket - lara endures another miserable day .  \n",
+              "449                                      robert galvin  \n",
+              "450                               melbourne 1996-12-06  \n",
+              "451  australia gave brian lara another reason to be...  \n",
               "\n",
-              "[452 rows x 5 columns]"
+              "[452 rows x 4 columns]"
             ]
           },
-          "execution_count": 20,
+          "execution_count": 15,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -613,7 +697,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "nRgq7e-g9Gev"
@@ -623,7 +706,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "IaPBjl_R9slh"
@@ -634,7 +716,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 21,
+      "execution_count": 16,
       "metadata": {
         "id": "ba0MYutC96CN"
       },
@@ -644,7 +726,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "groBqKuD9I34"
@@ -655,27 +736,27 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 22,
+      "execution_count": 17,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "CHQHRbQb9EDi",
-        "outputId": "2af7ca3f-034c-4e3d-d1b5-6b029e23613a"
+        "outputId": "f3f4537c-3f27-4210-c52f-7732d5d45130"
       },
       "outputs": [
         {
           "name": "stderr",
           "output_type": "stream",
           "text": [
-            "Running test cases...: 100%|██████████| 452/452 [00:58<00:00,  7.66it/s]\n"
+            "Running testcases... : 100%|██████████| 452/452 [01:07<00:00,  6.68it/s]\n"
           ]
         },
         {
           "data": {
             "text/plain": []
           },
-          "execution_count": 22,
+          "execution_count": 17,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -685,7 +766,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "71zHGe2q9O6G"
@@ -696,21 +776,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 23,
+      "execution_count": 18,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 423
+          "height": 606
         },
         "id": "keBNodfJ894u",
-        "outputId": "4b553141-0e2f-4512-f94a-108f5c92281b"
+        "outputId": "db80c638-e0f7-4b8d-fbf7-2b54e97fdae5"
       },
       "outputs": [
         {
           "data": {
             "text/html": [
               "\n",
-              "  <div id=\"df-fc107874-5555-4990-947f-a33b0d8d1291\">\n",
+              "\n",
+              "  <div id=\"df-43e1110c-3801-49e6-b86e-e99e610a5923\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -745,9 +826,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
-              "      <td>SOCCER - JAPAN GET LUCKY WIN , DHINA IN SURPRI...</td>\n",
+              "      <td>SOCCER - JAPAZ GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
               "      <td>japan: LOC, china: LOC</td>\n",
-              "      <td>japan: LOC, dhina: PER</td>\n",
+              "      <td>japaz: PER, china: LOC</td>\n",
               "      <td>False</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -755,9 +836,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Nadim Ladki</td>\n",
-              "      <td>Padim Ladki</td>\n",
+              "      <td>Zadim Ladki</td>\n",
               "      <td>nadim ladki: PER</td>\n",
-              "      <td>padim ladki: PER</td>\n",
+              "      <td>zadim ladki: PER</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -765,17 +846,17 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>AL-AIN , United Arab Emirates 1996-12-06</td>\n",
-              "      <td>AL-AIN , United Arab Emirates1 996-12-06</td>\n",
+              "      <td>AL-SIN , United Arab Emirates 1996-12-06</td>\n",
               "      <td>al-ain: LOC, united arab emirates: LOC</td>\n",
-              "      <td>al-ain: LOC, united arab emirates1: LOC</td>\n",
-              "      <td>False</td>\n",
+              "      <td>al-sin: LOC, united arab emirates: LOC</td>\n",
+              "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>3</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Japan began the defence of their Asian Cup tit...</td>\n",
-              "      <td>Japan began the sefence of their Asian Cup tit...</td>\n",
+              "      <td>Japan began the defence of their Asian Cup tit...</td>\n",
               "      <td>japan: LOC, asian cup: MISC, syria: LOC</td>\n",
               "      <td>japan: LOC, asian cup: MISC, syria: LOC</td>\n",
               "      <td>True</td>\n",
@@ -785,10 +866,10 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>But China saw their luck desert them in the se...</td>\n",
-              "      <td>But China saw their luck desert them in the se...</td>\n",
+              "      <td>But China saw their luck desert them in yhe se...</td>\n",
               "      <td>china: LOC, uzbekistan: LOC</td>\n",
-              "      <td>china: LOC, matsh: PER, uzbekistan: LOC</td>\n",
-              "      <td>False</td>\n",
+              "      <td>china: LOC, uzbekistan: LOC</td>\n",
+              "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>...</th>\n",
@@ -846,26 +927,93 @@
               "      <td>lowercase</td>\n",
               "      <td>Australia gave Brian Lara another reason to be...</td>\n",
               "      <td>australia gave brian lara another reason to be...</td>\n",
-              "      <td>australia: LOC, brian lara: PER, west indies: LOC</td>\n",
-              "      <td>australia: LOC, brian lara: PER, west indies: LOC</td>\n",
+              "      <td>australia: LOC, brian lara: PER, west indies: ...</td>\n",
+              "      <td>australia: LOC, brian lara: PER, west indies: ...</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
               "<p>452 rows × 7 columns</p>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-fc107874-5555-4990-947f-a33b0d8d1291')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-43e1110c-3801-49e6-b86e-e99e610a5923')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
-              "        \n",
+              "\n",
               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
               "       width=\"24px\">\n",
               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
               "  </svg>\n",
               "      </button>\n",
-              "      \n",
-              "  <style>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-ee304d5c-6a64-4851-80ae-0b76fbf905a1\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-ee304d5c-6a64-4851-80ae-0b76fbf905a1')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-ee304d5c-6a64-4851-80ae-0b76fbf905a1 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
               "    .colab-df-container {\n",
               "      display:flex;\n",
               "      flex-wrap:wrap;\n",
@@ -905,12 +1053,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-fc107874-5555-4990-947f-a33b0d8d1291 button.colab-df-convert');\n",
+              "          document.querySelector('#df-43e1110c-3801-49e6-b86e-e99e610a5923 button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-fc107874-5555-4990-947f-a33b0d8d1291');\n",
+              "          const element = document.querySelector('#df-43e1110c-3801-49e6-b86e-e99e610a5923');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -928,8 +1076,7 @@
               "        }\n",
               "      </script>\n",
               "    </div>\n",
-              "  </div>\n",
-              "  "
+              "  </div>\n"
             ],
             "text/plain": [
               "       category  test_type                                           original  \\\n",
@@ -946,11 +1093,11 @@
               "451  robustness  lowercase  Australia gave Brian Lara another reason to be...   \n",
               "\n",
               "                                             test_case  \\\n",
-              "0    SOCCER - JAPAN GET LUCKY WIN , DHINA IN SURPRI...   \n",
-              "1                                          Padim Ladki   \n",
-              "2             AL-AIN , United Arab Emirates1 996-12-06   \n",
-              "3    Japan began the sefence of their Asian Cup tit...   \n",
-              "4    But China saw their luck desert them in the se...   \n",
+              "0    SOCCER - JAPAZ GET LUCKY WIN , CHINA IN SURPRI...   \n",
+              "1                                          Zadim Ladki   \n",
+              "2             AL-SIN , United Arab Emirates 1996-12-06   \n",
+              "3    Japan began the defence of their Asian Cup tit...   \n",
+              "4    But China saw their luck desert them in yhe se...   \n",
               "..                                                 ...   \n",
               "447                    portuguesa 1 atletico mineiro 0   \n",
               "448     cricket - lara endures another miserable day .   \n",
@@ -969,25 +1116,25 @@
               "448                                  lara endures: PER   \n",
               "449                                 robert galvin: PER   \n",
               "450                                     melbourne: LOC   \n",
-              "451  australia: LOC, brian lara: PER, west indies: LOC   \n",
+              "451  australia: LOC, brian lara: PER, west indies: ...   \n",
               "\n",
               "                                         actual_result   pass  \n",
-              "0                               japan: LOC, dhina: PER  False  \n",
-              "1                                     padim ladki: PER   True  \n",
-              "2              al-ain: LOC, united arab emirates1: LOC  False  \n",
+              "0                               japaz: PER, china: LOC  False  \n",
+              "1                                     zadim ladki: PER   True  \n",
+              "2               al-sin: LOC, united arab emirates: LOC   True  \n",
               "3              japan: LOC, asian cup: MISC, syria: LOC   True  \n",
-              "4              china: LOC, matsh: PER, uzbekistan: LOC  False  \n",
+              "4                          china: LOC, uzbekistan: LOC   True  \n",
               "..                                                 ...    ...  \n",
               "447             portuguesa: ORG, atletico mineiro: ORG   True  \n",
               "448                                  lara endures: PER   True  \n",
               "449                                 robert galvin: PER   True  \n",
               "450                                     melbourne: LOC   True  \n",
-              "451  australia: LOC, brian lara: PER, west indies: LOC   True  \n",
+              "451  australia: LOC, brian lara: PER, west indies: ...   True  \n",
               "\n",
               "[452 rows x 7 columns]"
             ]
           },
-          "execution_count": 23,
+          "execution_count": 18,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -997,7 +1144,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "57lqGecA9UXG"
@@ -1007,7 +1153,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "jPvPCr_S9Zb8"
@@ -1018,21 +1163,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 24,
+      "execution_count": 19,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 112
         },
         "id": "gp57HcF9yxi7",
-        "outputId": "a980bcff-4eba-4930-bfa7-0a1f7e67bd8e"
+        "outputId": "0d65b1bd-2c1b-4fd2-8cb7-199a012a9ed3"
       },
       "outputs": [
         {
           "data": {
             "text/html": [
               "\n",
-              "  <div id=\"df-6c333c35-9a95-41e9-8471-416e02cb232d\">\n",
+              "\n",
+              "  <div id=\"df-3b9fdf2e-983a-465b-89c4-d31958f36197\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -1066,11 +1212,11 @@
               "      <th>0</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
-              "      <td>98</td>\n",
-              "      <td>128</td>\n",
-              "      <td>57%</td>\n",
+              "      <td>53</td>\n",
+              "      <td>173</td>\n",
+              "      <td>77%</td>\n",
               "      <td>65%</td>\n",
-              "      <td>False</td>\n",
+              "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
@@ -1085,18 +1231,85 @@
               "  </tbody>\n",
               "</table>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6c333c35-9a95-41e9-8471-416e02cb232d')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3b9fdf2e-983a-465b-89c4-d31958f36197')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
-              "        \n",
+              "\n",
               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
               "       width=\"24px\">\n",
               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
               "  </svg>\n",
               "      </button>\n",
-              "      \n",
-              "  <style>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-2b9195cf-2bf8-4e22-a9b9-412e9894d137\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-2b9195cf-2bf8-4e22-a9b9-412e9894d137')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-2b9195cf-2bf8-4e22-a9b9-412e9894d137 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
               "    .colab-df-container {\n",
               "      display:flex;\n",
               "      flex-wrap:wrap;\n",
@@ -1136,12 +1349,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-6c333c35-9a95-41e9-8471-416e02cb232d button.colab-df-convert');\n",
+              "          document.querySelector('#df-3b9fdf2e-983a-465b-89c4-d31958f36197 button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-6c333c35-9a95-41e9-8471-416e02cb232d');\n",
+              "          const element = document.querySelector('#df-3b9fdf2e-983a-465b-89c4-d31958f36197');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -1159,20 +1372,19 @@
               "        }\n",
               "      </script>\n",
               "    </div>\n",
-              "  </div>\n",
-              "  "
+              "  </div>\n"
             ],
             "text/plain": [
               "     category  test_type  fail_count  pass_count pass_rate minimum_pass_rate  \\\n",
-              "0  robustness   add_typo          98         128       57%               65%   \n",
+              "0  robustness   add_typo          53         173       77%               65%   \n",
               "1  robustness  lowercase           0         226      100%               65%   \n",
               "\n",
-              "    pass  \n",
-              "0  False  \n",
-              "1   True  "
+              "   pass  \n",
+              "0  True  \n",
+              "1  True  "
             ]
           },
-          "execution_count": 24,
+          "execution_count": 19,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -1182,7 +1394,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "7rpJ3QbPinkT"
@@ -1192,7 +1403,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "3g-s1Gikv65h"
@@ -1202,12 +1412,11 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
-      "metadata": {},
+      "metadata": {
+        "id": "s5s5gLn-xa8M"
+      },
       "source": [
-        "custom_proportions is a dictionary with augmentation on test type as key and proportion as value. The proportion is the percentage of the test cases that will be augmented with the given augmentation type.\n",
-        "\n",
         "**Augumentation with custom proportions in Dict format**\n",
         "\n",
         "custom_proportions is a dictionary with augmentation on test type as key and proportion as value. The proportion is the percentage of the test cases that will be augmented with the given augmentation type.\n",
@@ -1218,7 +1427,7 @@
         "\n",
         "**Augumentation with custom proportions in List format**\n",
         "\n",
-        "custom_proportions is a list of test types. \n",
+        "custom_proportions is a list of test types.\n",
         "```\n",
         "custom_proportions = ['add_typo', 'lowercase']\n",
         "```"
@@ -1226,34 +1435,44 @@
     },
     {
       "cell_type": "markdown",
-      "metadata": {},
+      "metadata": {
+        "id": "f00yfUE_xa8M"
+      },
       "source": [
         "The `.augment()` function takes the following parameters:\n",
         "\n",
-        "- `input_path` (str): Path to the input file.\n",
-        "- `output_path` (str): Path to save the augmented data.\n",
-        "- `export_mode` (str, optional): Determines how the samples are modified or exported. Defaults to `'add'`.\n",
-        "   - `'inplace'`: Modifies the list of samples in place.\n",
-        "   - `'add'`: Adds new samples to the input data.\n",
-        "   - `'transformed'`: Exports only the transformed data, excluding untransformed samples."
+        "1. `training_data`: (Required) Specifies the source of the original training data. It should be a dictionary containing the necessary information about the dataset.\n",
+        "    - Example: `{\"data_source\": \"conll03.conll\"}`\n",
+        "\n",
+        "2. `augmented_data`: (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.\n",
+        "    - Example: `augmented_conll03.conll`\n",
+        "\n",
+        "3. `custom_proportions`: (Required) custom_proportions is a dictionary with augmentation on test type as key and proportion as value. The proportion is the percentage of the test cases that will be augmented with the given augmentation type.\n",
+        "    - Example: `{\"add_typo\": 0.3, \"lowercase\": 0.3}`\n",
+        "\n",
+        "4. `export_mode`: (Optional) Specifies how the augmented data should be exported. The possible values are:\n",
+        "    - `'inplace'`: Modifies the list of samples in place.\n",
+        "    - `'add'`: Adds new samples to the input data.\n",
+        "    - `'transformed'`: Exports only the transformed data, excluding different untransformed samples.\n",
+        "    - Example: `\"transformed\"`\n"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 25,
+      "execution_count": 22,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "EBTz4Fqev7xX",
-        "outputId": "10c6b4a0-e51b-43c8-8b1a-ab3de87bbd39"
+        "outputId": "11986bfb-36bf-4ecd-fe87-1264deb83744"
       },
       "outputs": [
         {
           "data": {
             "text/plain": []
           },
-          "execution_count": 25,
+          "execution_count": 22,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -1263,15 +1482,19 @@
         "    'add_typo':0.3,\n",
         "    'lowercase':0.3\n",
         "}\n",
+        "\n",
+        "data_kwargs = {\n",
+        "      \"data_source\" : \"conll03.conll\",\n",
+        "       }\n",
+        "\n",
         "harness.augment(\n",
-        "    input_path=\"conll03.conll\",\n",
-        "    output_path='augmented_conll03.conll',\n",
-        "    custom_proportions=custom_proportions, \n",
-        "    export_mode='add')"
+        "    training_data = data_kwargs,\n",
+        "    augmented_data =\"augmented_conll03.conll\",\n",
+        "    custom_proportions=custom_proportions,\n",
+        "    export_mode=\"transformed\")"
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "O2HL6Gip0ST0"
@@ -1281,7 +1504,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "z4aCF0kYwL4w"
@@ -1292,13 +1514,13 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 26,
+      "execution_count": 23,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "WvRFmf3PGz3k",
-        "outputId": "ec91e2ab-af1a-4ade-fd3a-2035162d4cf5"
+        "outputId": "6c583445-ffdd-41cc-8967-9ab02f02a506"
       },
       "outputs": [
         {
@@ -1318,7 +1540,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "QK8o7XaI_ZAf"
@@ -1329,20 +1550,42 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 27,
+      "execution_count": 24,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "UpaSjj05_fPd",
-        "outputId": "16f2d397-9fa4-420a-81c5-37bec5bb6904"
+        "outputId": "4dedb152-b42c-4466-eb78-a4b24a332a4d"
       },
       "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Test Configuration : \n",
+            " {\n",
+            " \"tests\": {\n",
+            "  \"defaults\": {\n",
+            "   \"min_pass_rate\": 0.65\n",
+            "  },\n",
+            "  \"robustness\": {\n",
+            "   \"add_typo\": {\n",
+            "    \"min_pass_rate\": 0.65\n",
+            "   },\n",
+            "   \"lowercase\": {\n",
+            "    \"min_pass_rate\": 0.65\n",
+            "   }\n",
+            "  }\n",
+            " }\n",
+            "}\n"
+          ]
+        },
         {
           "name": "stderr",
           "output_type": "stream",
           "text": [
-            "Generating testcases... (robustness): 100%|██████████| 1/1 [00:30<00:00, 30.37s/it]\n"
+            "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 5405.03it/s]\n"
           ]
         }
       ],
@@ -1351,7 +1594,6 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "9aif5bl_G0GZ"
@@ -1362,27 +1604,27 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 28,
+      "execution_count": 25,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "StrOVtMoAQpf",
-        "outputId": "616b624e-c5bd-4b19-c044-f82ee97729bb"
+        "outputId": "18f2ad70-bbaa-41ff-80bc-fe10d246eda4"
       },
       "outputs": [
         {
           "name": "stderr",
           "output_type": "stream",
           "text": [
-            "Running test cases...: 100%|██████████| 452/452 [00:59<00:00,  7.55it/s]\n"
+            "Running testcases... : 100%|██████████| 452/452 [01:03<00:00,  7.09it/s]\n"
           ]
         },
         {
           "data": {
             "text/plain": []
           },
-          "execution_count": 28,
+          "execution_count": 25,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -1393,21 +1635,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 29,
+      "execution_count": 26,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 423
+          "height": 606
         },
         "id": "znh2xqQmAWHf",
-        "outputId": "c0b55b2c-efa0-4ac1-a290-3eaa6b34b0f4"
+        "outputId": "174d9f95-f0d6-40ca-bd09-07aa77aff008"
       },
       "outputs": [
         {
           "data": {
             "text/html": [
               "\n",
-              "  <div id=\"df-43323da6-bdf5-41b6-a6de-2db1828c3edf\">\n",
+              "\n",
+              "  <div id=\"df-061b48f2-f0c1-4e90-87d5-412ecda9c45d\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -1442,9 +1685,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
-              "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SYRPRI...</td>\n",
-              "      <td>japan: LOC, china: LOC</td>\n",
-              "      <td>japan: LOC, china: LOC</td>\n",
+              "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
+              "      <td>japan: LOC, lucky: LOC, china: LOC</td>\n",
+              "      <td>japan: LOC, lucky: LOC, china: LOC</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1452,9 +1695,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Nadim Ladki</td>\n",
-              "      <td>Nasim Ladki</td>\n",
+              "      <td>Jadim Ladki</td>\n",
               "      <td>nadim ladki: PER</td>\n",
-              "      <td>nasim ladki: PER</td>\n",
+              "      <td>jadim ladki: PER</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1462,9 +1705,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>AL-AIN , United Arab Emirates 1996-12-06</td>\n",
-              "      <td>AL-AIN , United Arsb Emirates 1996-12-06</td>\n",
-              "      <td>al-ain: LOC, united: LOC, arab emirates: LOC</td>\n",
-              "      <td>al-ain: LOC, united arsb emirates: LOC</td>\n",
+              "      <td>AL-AIN , Ynited Arab Emirates 1996-12-06</td>\n",
+              "      <td>al-ain: LOC, united arab emirates: LOC</td>\n",
+              "      <td>al-ain: LOC, arab emirates: LOC</td>\n",
               "      <td>False</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1482,10 +1725,10 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>But China saw their luck desert them in the se...</td>\n",
-              "      <td>But China saw their luck dseert them in the se...</td>\n",
+              "      <td>But Chins saw their luck desert them in the se...</td>\n",
               "      <td>china: LOC, uzbekistan: LOC</td>\n",
-              "      <td>china: LOC, uzbekistan: LOC</td>\n",
-              "      <td>True</td>\n",
+              "      <td>uzbekistan: LOC</td>\n",
+              "      <td>False</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>...</th>\n",
@@ -1503,8 +1746,8 @@
               "      <td>lowercase</td>\n",
               "      <td>Portuguesa 1 Atletico Mineiro 0</td>\n",
               "      <td>portuguesa 1 atletico mineiro 0</td>\n",
-              "      <td>portuguesa: ORG, atletico mineiro: ORG</td>\n",
-              "      <td>portuguesa: ORG, atletico mineiro: ORG</td>\n",
+              "      <td>portuguesa: LOC, atletico: ORG, mineiro: ORG</td>\n",
+              "      <td>portuguesa: LOC, atletico: ORG, mineiro: ORG</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1513,8 +1756,8 @@
               "      <td>lowercase</td>\n",
               "      <td>CRICKET - LARA ENDURES ANOTHER MISERABLE DAY .</td>\n",
               "      <td>cricket - lara endures another miserable day .</td>\n",
-              "      <td>lara: PER</td>\n",
-              "      <td>lara: PER</td>\n",
+              "      <td></td>\n",
+              "      <td></td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1543,26 +1786,93 @@
               "      <td>lowercase</td>\n",
               "      <td>Australia gave Brian Lara another reason to be...</td>\n",
               "      <td>australia gave brian lara another reason to be...</td>\n",
-              "      <td>australia: LOC, brian lara: PER, west: LOC, wo...</td>\n",
-              "      <td>australia: LOC, brian lara: PER, west: LOC, wo...</td>\n",
+              "      <td>australia: LOC, brian: PER, indies: LOC, world...</td>\n",
+              "      <td>australia: LOC, brian: PER, indies: LOC, world...</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
               "<p>452 rows × 7 columns</p>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-43323da6-bdf5-41b6-a6de-2db1828c3edf')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-061b48f2-f0c1-4e90-87d5-412ecda9c45d')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
-              "        \n",
+              "\n",
               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
               "       width=\"24px\">\n",
               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
               "  </svg>\n",
               "      </button>\n",
-              "      \n",
-              "  <style>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-896134f8-232d-4435-8867-403648a8924b\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-896134f8-232d-4435-8867-403648a8924b')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-896134f8-232d-4435-8867-403648a8924b button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
               "    .colab-df-container {\n",
               "      display:flex;\n",
               "      flex-wrap:wrap;\n",
@@ -1602,12 +1912,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-43323da6-bdf5-41b6-a6de-2db1828c3edf button.colab-df-convert');\n",
+              "          document.querySelector('#df-061b48f2-f0c1-4e90-87d5-412ecda9c45d button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-43323da6-bdf5-41b6-a6de-2db1828c3edf');\n",
+              "          const element = document.querySelector('#df-061b48f2-f0c1-4e90-87d5-412ecda9c45d');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -1625,8 +1935,7 @@
               "        }\n",
               "      </script>\n",
               "    </div>\n",
-              "  </div>\n",
-              "  "
+              "  </div>\n"
             ],
             "text/plain": [
               "       category  test_type                                           original  \\\n",
@@ -1643,11 +1952,11 @@
               "451  robustness  lowercase  Australia gave Brian Lara another reason to be...   \n",
               "\n",
               "                                             test_case  \\\n",
-              "0    SOCCER - JAPAN GET LUCKY WIN , CHINA IN SYRPRI...   \n",
-              "1                                          Nasim Ladki   \n",
-              "2             AL-AIN , United Arsb Emirates 1996-12-06   \n",
+              "0    SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...   \n",
+              "1                                          Jadim Ladki   \n",
+              "2             AL-AIN , Ynited Arab Emirates 1996-12-06   \n",
               "3    Japan began the defence of their Asian Cup tit...   \n",
-              "4    But China saw their luck dseert them in the se...   \n",
+              "4    But Chins saw their luck desert them in the se...   \n",
               "..                                                 ...   \n",
               "447                    portuguesa 1 atletico mineiro 0   \n",
               "448     cricket - lara endures another miserable day .   \n",
@@ -1656,35 +1965,35 @@
               "451  australia gave brian lara another reason to be...   \n",
               "\n",
               "                                       expected_result  \\\n",
-              "0                               japan: LOC, china: LOC   \n",
+              "0                   japan: LOC, lucky: LOC, china: LOC   \n",
               "1                                     nadim ladki: PER   \n",
-              "2         al-ain: LOC, united: LOC, arab emirates: LOC   \n",
+              "2               al-ain: LOC, united arab emirates: LOC   \n",
               "3              japan: LOC, asian cup: MISC, syria: LOC   \n",
               "4                          china: LOC, uzbekistan: LOC   \n",
               "..                                                 ...   \n",
-              "447             portuguesa: ORG, atletico mineiro: ORG   \n",
-              "448                                          lara: PER   \n",
+              "447       portuguesa: LOC, atletico: ORG, mineiro: ORG   \n",
+              "448                                                      \n",
               "449                                 robert galvin: PER   \n",
               "450                                     melbourne: LOC   \n",
-              "451  australia: LOC, brian lara: PER, west: LOC, wo...   \n",
+              "451  australia: LOC, brian: PER, indies: LOC, world...   \n",
               "\n",
               "                                         actual_result   pass  \n",
-              "0                               japan: LOC, china: LOC   True  \n",
-              "1                                     nasim ladki: PER   True  \n",
-              "2               al-ain: LOC, united arsb emirates: LOC  False  \n",
+              "0                   japan: LOC, lucky: LOC, china: LOC   True  \n",
+              "1                                     jadim ladki: PER   True  \n",
+              "2                      al-ain: LOC, arab emirates: LOC  False  \n",
               "3              japan: LOC, asian cup: MISC, syria: LOC   True  \n",
-              "4                          china: LOC, uzbekistan: LOC   True  \n",
+              "4                                      uzbekistan: LOC  False  \n",
               "..                                                 ...    ...  \n",
-              "447             portuguesa: ORG, atletico mineiro: ORG   True  \n",
-              "448                                          lara: PER   True  \n",
+              "447       portuguesa: LOC, atletico: ORG, mineiro: ORG   True  \n",
+              "448                                                      True  \n",
               "449                                 robert galvin: PER   True  \n",
               "450                                     melbourne: LOC   True  \n",
-              "451  australia: LOC, brian lara: PER, west: LOC, wo...   True  \n",
+              "451  australia: LOC, brian: PER, indies: LOC, world...   True  \n",
               "\n",
               "[452 rows x 7 columns]"
             ]
           },
-          "execution_count": 29,
+          "execution_count": 26,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -1695,21 +2004,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 30,
+      "execution_count": 27,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 112
         },
         "id": "JSqkrBOZ-TeG",
-        "outputId": "34060368-241c-48dc-818e-bd84f7e85a1a"
+        "outputId": "eb818556-51f5-40f4-998d-6137e0d5b70d"
       },
       "outputs": [
         {
           "data": {
             "text/html": [
               "\n",
-              "  <div id=\"df-edba045f-a1ff-461e-8890-1d8266dcb62f\">\n",
+              "\n",
+              "  <div id=\"df-69ae5564-8629-40a6-8237-37f926672c9f\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -1743,9 +2053,9 @@
               "      <th>0</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
-              "      <td>62</td>\n",
-              "      <td>164</td>\n",
-              "      <td>73%</td>\n",
+              "      <td>50</td>\n",
+              "      <td>176</td>\n",
+              "      <td>78%</td>\n",
               "      <td>65%</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
@@ -1762,18 +2072,85 @@
               "  </tbody>\n",
               "</table>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-edba045f-a1ff-461e-8890-1d8266dcb62f')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-69ae5564-8629-40a6-8237-37f926672c9f')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
-              "        \n",
+              "\n",
               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
               "       width=\"24px\">\n",
               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
               "  </svg>\n",
               "      </button>\n",
-              "      \n",
-              "  <style>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-bd6984c3-1c78-4feb-96dc-9af60fab6bdc\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-bd6984c3-1c78-4feb-96dc-9af60fab6bdc')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-bd6984c3-1c78-4feb-96dc-9af60fab6bdc button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
               "    .colab-df-container {\n",
               "      display:flex;\n",
               "      flex-wrap:wrap;\n",
@@ -1813,12 +2190,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-edba045f-a1ff-461e-8890-1d8266dcb62f button.colab-df-convert');\n",
+              "          document.querySelector('#df-69ae5564-8629-40a6-8237-37f926672c9f button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-edba045f-a1ff-461e-8890-1d8266dcb62f');\n",
+              "          const element = document.querySelector('#df-69ae5564-8629-40a6-8237-37f926672c9f');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -1836,12 +2213,11 @@
               "        }\n",
               "      </script>\n",
               "    </div>\n",
-              "  </div>\n",
-              "  "
+              "  </div>\n"
             ],
             "text/plain": [
               "     category  test_type  fail_count  pass_count pass_rate minimum_pass_rate  \\\n",
-              "0  robustness   add_typo          62         164       73%               65%   \n",
+              "0  robustness   add_typo          50         176       78%               65%   \n",
               "1  robustness  lowercase           0         226      100%               65%   \n",
               "\n",
               "   pass  \n",
@@ -1849,7 +2225,7 @@
               "1  True  "
             ]
           },
-          "execution_count": 30,
+          "execution_count": 27,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -1859,14 +2235,943 @@
       ]
     },
     {
-      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "J0J5n2b1Ak-U"
       },
       "source": [
         "\n",
-        "We can see that after performing augmentation, even the **add_typo** test is passing which failed earlier."
+        "We can see that after performing augmentation, pass_rate for **add_typo** test is increased."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UXd8Nvg23UTf"
+      },
+      "source": [
+        "# HuggingFace Dataset Augmentation for Text Classification"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ob4MXZW-CoZx"
+      },
+      "source": [
+        "### Installing required dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "10A82M0q6nj3"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install datasets"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dNex30tpClAi"
+      },
+      "source": [
+        "### Setup and Configure Harness"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 41,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "SBMhtvqV3AUm",
+        "outputId": "57c09f14-ed87-4287-e44e-4a0f3440e9f5"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Test Configuration : \n",
+            " {\n",
+            " \"tests\": {\n",
+            "  \"defaults\": {\n",
+            "   \"min_pass_rate\": 1.0\n",
+            "  },\n",
+            "  \"robustness\": {\n",
+            "   \"add_typo\": {\n",
+            "    \"min_pass_rate\": 0.7\n",
+            "   },\n",
+            "   \"american_to_british\": {\n",
+            "    \"min_pass_rate\": 0.7\n",
+            "   }\n",
+            "  },\n",
+            "  \"accuracy\": {\n",
+            "   \"min_micro_f1_score\": {\n",
+            "    \"min_score\": 0.7\n",
+            "   }\n",
+            "  },\n",
+            "  \"bias\": {\n",
+            "   \"replace_to_female_pronouns\": {\n",
+            "    \"min_pass_rate\": 0.7\n",
+            "   },\n",
+            "   \"replace_to_low_income_country\": {\n",
+            "    \"min_pass_rate\": 0.7\n",
+            "   }\n",
+            "  },\n",
+            "  \"fairness\": {\n",
+            "   \"min_gender_f1_score\": {\n",
+            "    \"min_score\": 0.6\n",
+            "   }\n",
+            "  },\n",
+            "  \"representation\": {\n",
+            "   \"min_label_representation_count\": {\n",
+            "    \"min_count\": 50\n",
+            "   }\n",
+            "  }\n",
+            " }\n",
+            "}\n"
+          ]
+        }
+      ],
+      "source": [
+        "harness = Harness(task=\"text-classification\", hub=\"huggingface\",\n",
+        "                   model=\"distilbert-base-uncased-finetuned-sst-2-english\",\n",
+        "                  data={\"name\":'glue',\n",
+        "                  \"subset\":\"sst2\",\n",
+        "                  \"feature_column\":\"sentence\",\n",
+        "                  \"target_column\":'label',\n",
+        "                  \"split\":\"train\"\n",
+        "                  })"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 42,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "34SjM0fp6kor",
+        "outputId": "1e42cd30-7b51-42bd-932b-9d503ce09fca"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "{'tests': {'defaults': {'min_pass_rate': 0.65},\n",
+              "  'robustness': {'add_speech_to_text_typo': {'min_pass_rate': 0.6},\n",
+              "   'add_ocr_typo': {'min_pass_rate': 0.6}}}}"
+            ]
+          },
+          "execution_count": 42,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.configure(\n",
+        "{\n",
+        " 'tests': {'defaults': {'min_pass_rate': 0.65},\n",
+        "           'robustness': {'add_speech_to_text_typo':{'min_pass_rate': 0.60},\n",
+        "                          'add_ocr_typo':{'min_pass_rate': 0.60},\n",
+        "                        }\n",
+        "          }\n",
+        " }\n",
+        " )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 43,
+      "metadata": {
+        "id": "DLF24Tj_62DI"
+      },
+      "outputs": [],
+      "source": [
+        "# Limit the data to the first 500 samples\n",
+        "harness.data = harness.data[:500]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5wAc9cbhCawc"
+      },
+      "source": [
+        "### Generating the test cases"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aaQ1kZMjCd3p"
+      },
+      "source": [
+        "harness.generate() method automatically generates the test cases (based on the provided configuration)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 44,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Yg03CJTQ64cE",
+        "outputId": "474a2a51-56fc-40b6-95f2-17e1e3e30db6"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 348.05it/s]\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 44,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.generate()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4QjiSxKLCT_1"
+      },
+      "source": [
+        "### Running the tests"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 45,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "JooWo_t86565",
+        "outputId": "d96be531-811d-485b-bb48-f90965d49183"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Running testcases... : 100%|██████████| 1000/1000 [02:05<00:00,  7.98it/s]\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 45,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.run()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sVjN4Tb-CWmm"
+      },
+      "source": [
+        "Called after harness.generate() and is to used to run all the tests. Returns a pass/fail flag for each test."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 46,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 624
+        },
+        "id": "thIlr0uJ67O_",
+        "outputId": "3a4251f2-0e11-41e6-a744-63b8fd7bf764"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "  <div id=\"df-d4ff6c75-1edc-4af2-a0d6-221a285928ee\">\n",
+              "    <div class=\"colab-df-container\">\n",
+              "      <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>category</th>\n",
+              "      <th>test_type</th>\n",
+              "      <th>original</th>\n",
+              "      <th>test_case</th>\n",
+              "      <th>expected_result</th>\n",
+              "      <th>actual_result</th>\n",
+              "      <th>pass</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_speech_to_text_typo</td>\n",
+              "      <td>hide new secretions from the parental units</td>\n",
+              "      <td>hide new secretions frum the parental units'</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_speech_to_text_typo</td>\n",
+              "      <td>contains no wit , only labored gags</td>\n",
+              "      <td>contains know witte , only labored gags</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_speech_to_text_typo</td>\n",
+              "      <td>that loves its characters and communicates som...</td>\n",
+              "      <td>that loves its characters and communicates som...</td>\n",
+              "      <td>POSITIVE</td>\n",
+              "      <td>POSITIVE</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_speech_to_text_typo</td>\n",
+              "      <td>remains utterly satisfied to remain the same t...</td>\n",
+              "      <td>remains utterly satisfied to remain the sejm t...</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_speech_to_text_typo</td>\n",
+              "      <td>on the worst revenge-of-the-nerds clichés the ...</td>\n",
+              "      <td>aune the worst revenge-of-the-nerds clichés th...</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>995</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_ocr_typo</td>\n",
+              "      <td>true star</td>\n",
+              "      <td>trne ftar</td>\n",
+              "      <td>POSITIVE</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>996</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_ocr_typo</td>\n",
+              "      <td>hampered -- no , paralyzed -- by a self-indulg...</td>\n",
+              "      <td>hampered -- n^o , paralyzed -- by a self-indul...</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>997</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_ocr_typo</td>\n",
+              "      <td>is expressly for idiots who do n't care what k...</td>\n",
+              "      <td>is expressly f^r idiots avho do n't caie v\\hat...</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>998</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_ocr_typo</td>\n",
+              "      <td>is haunting ... ( it 's ) what punk rock music...</td>\n",
+              "      <td>is haunting ... ( i^t 's ) v\\hat punk rock mul...</td>\n",
+              "      <td>POSITIVE</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>999</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_ocr_typo</td>\n",
+              "      <td>which nurses plot holes gaping enough to pilot...</td>\n",
+              "      <td>y/hich nurses plot holes gaping enongh t^o pil...</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>NEGATIVE</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>1000 rows × 7 columns</p>\n",
+              "</div>\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d4ff6c75-1edc-4af2-a0d6-221a285928ee')\"\n",
+              "              title=\"Convert this dataframe to an interactive table.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+              "  </svg>\n",
+              "      </button>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-cdb4fa3e-b48a-41cb-bb44-ca09c977ce83\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-cdb4fa3e-b48a-41cb-bb44-ca09c977ce83')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-cdb4fa3e-b48a-41cb-bb44-ca09c977ce83 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      flex-wrap:wrap;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "      <script>\n",
+              "        const buttonEl =\n",
+              "          document.querySelector('#df-d4ff6c75-1edc-4af2-a0d6-221a285928ee button.colab-df-convert');\n",
+              "        buttonEl.style.display =\n",
+              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "        async function convertToInteractive(key) {\n",
+              "          const element = document.querySelector('#df-d4ff6c75-1edc-4af2-a0d6-221a285928ee');\n",
+              "          const dataTable =\n",
+              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                     [key], {});\n",
+              "          if (!dataTable) return;\n",
+              "\n",
+              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "            + ' to learn more about interactive tables.';\n",
+              "          element.innerHTML = '';\n",
+              "          dataTable['output_type'] = 'display_data';\n",
+              "          await google.colab.output.renderOutput(dataTable, element);\n",
+              "          const docLink = document.createElement('div');\n",
+              "          docLink.innerHTML = docLinkHtml;\n",
+              "          element.appendChild(docLink);\n",
+              "        }\n",
+              "      </script>\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "       category                test_type  \\\n",
+              "0    robustness  add_speech_to_text_typo   \n",
+              "1    robustness  add_speech_to_text_typo   \n",
+              "2    robustness  add_speech_to_text_typo   \n",
+              "3    robustness  add_speech_to_text_typo   \n",
+              "4    robustness  add_speech_to_text_typo   \n",
+              "..          ...                      ...   \n",
+              "995  robustness             add_ocr_typo   \n",
+              "996  robustness             add_ocr_typo   \n",
+              "997  robustness             add_ocr_typo   \n",
+              "998  robustness             add_ocr_typo   \n",
+              "999  robustness             add_ocr_typo   \n",
+              "\n",
+              "                                              original  \\\n",
+              "0         hide new secretions from the parental units    \n",
+              "1                 contains no wit , only labored gags    \n",
+              "2    that loves its characters and communicates som...   \n",
+              "3    remains utterly satisfied to remain the same t...   \n",
+              "4    on the worst revenge-of-the-nerds clichés the ...   \n",
+              "..                                                 ...   \n",
+              "995                                         true star    \n",
+              "996  hampered -- no , paralyzed -- by a self-indulg...   \n",
+              "997  is expressly for idiots who do n't care what k...   \n",
+              "998  is haunting ... ( it 's ) what punk rock music...   \n",
+              "999  which nurses plot holes gaping enough to pilot...   \n",
+              "\n",
+              "                                             test_case expected_result  \\\n",
+              "0        hide new secretions frum the parental units'         NEGATIVE   \n",
+              "1             contains know witte , only labored gags         NEGATIVE   \n",
+              "2    that loves its characters and communicates som...        POSITIVE   \n",
+              "3    remains utterly satisfied to remain the sejm t...        NEGATIVE   \n",
+              "4    aune the worst revenge-of-the-nerds clichés th...        NEGATIVE   \n",
+              "..                                                 ...             ...   \n",
+              "995                                         trne ftar         POSITIVE   \n",
+              "996  hampered -- n^o , paralyzed -- by a self-indul...        NEGATIVE   \n",
+              "997  is expressly f^r idiots avho do n't caie v\\hat...        NEGATIVE   \n",
+              "998  is haunting ... ( i^t 's ) v\\hat punk rock mul...        POSITIVE   \n",
+              "999  y/hich nurses plot holes gaping enongh t^o pil...        NEGATIVE   \n",
+              "\n",
+              "    actual_result   pass  \n",
+              "0        NEGATIVE   True  \n",
+              "1        NEGATIVE   True  \n",
+              "2        POSITIVE   True  \n",
+              "3        NEGATIVE   True  \n",
+              "4        NEGATIVE   True  \n",
+              "..            ...    ...  \n",
+              "995      NEGATIVE  False  \n",
+              "996      NEGATIVE   True  \n",
+              "997      NEGATIVE   True  \n",
+              "998      NEGATIVE  False  \n",
+              "999      NEGATIVE   True  \n",
+              "\n",
+              "[1000 rows x 7 columns]"
+            ]
+          },
+          "execution_count": 46,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.generated_results()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5Erhl6nkCQjB"
+      },
+      "source": [
+        "This method returns the generated results in the form of a pandas dataframe, which provides a convenient and easy-to-use format for working with the test results. You can use this method to quickly identify the test cases that failed and to determine where fixes are needed."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2gVoIzpWCFk2"
+      },
+      "source": [
+        "#### Report of the tests"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 47,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 112
+        },
+        "id": "xjkaiyLd68y9",
+        "outputId": "0b788ded-a9af-4bcc-b843-293dd90754b4"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "  <div id=\"df-f4a70dfb-b637-4d60-a9c0-d35d9f32f9e9\">\n",
+              "    <div class=\"colab-df-container\">\n",
+              "      <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>category</th>\n",
+              "      <th>test_type</th>\n",
+              "      <th>fail_count</th>\n",
+              "      <th>pass_count</th>\n",
+              "      <th>pass_rate</th>\n",
+              "      <th>minimum_pass_rate</th>\n",
+              "      <th>pass</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_speech_to_text_typo</td>\n",
+              "      <td>35</td>\n",
+              "      <td>465</td>\n",
+              "      <td>93%</td>\n",
+              "      <td>60%</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>robustness</td>\n",
+              "      <td>add_ocr_typo</td>\n",
+              "      <td>94</td>\n",
+              "      <td>406</td>\n",
+              "      <td>81%</td>\n",
+              "      <td>60%</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f4a70dfb-b637-4d60-a9c0-d35d9f32f9e9')\"\n",
+              "              title=\"Convert this dataframe to an interactive table.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+              "  </svg>\n",
+              "      </button>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-ff2d760c-728c-4d45-8a64-fa70ecc13887\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-ff2d760c-728c-4d45-8a64-fa70ecc13887')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-ff2d760c-728c-4d45-8a64-fa70ecc13887 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      flex-wrap:wrap;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "      <script>\n",
+              "        const buttonEl =\n",
+              "          document.querySelector('#df-f4a70dfb-b637-4d60-a9c0-d35d9f32f9e9 button.colab-df-convert');\n",
+              "        buttonEl.style.display =\n",
+              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "        async function convertToInteractive(key) {\n",
+              "          const element = document.querySelector('#df-f4a70dfb-b637-4d60-a9c0-d35d9f32f9e9');\n",
+              "          const dataTable =\n",
+              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                     [key], {});\n",
+              "          if (!dataTable) return;\n",
+              "\n",
+              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "            + ' to learn more about interactive tables.';\n",
+              "          element.innerHTML = '';\n",
+              "          dataTable['output_type'] = 'display_data';\n",
+              "          await google.colab.output.renderOutput(dataTable, element);\n",
+              "          const docLink = document.createElement('div');\n",
+              "          docLink.innerHTML = docLinkHtml;\n",
+              "          element.appendChild(docLink);\n",
+              "        }\n",
+              "      </script>\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "text/plain": [
+              "     category                test_type  fail_count  pass_count pass_rate  \\\n",
+              "0  robustness  add_speech_to_text_typo          35         465       93%   \n",
+              "1  robustness             add_ocr_typo          94         406       81%   \n",
+              "\n",
+              "  minimum_pass_rate  pass  \n",
+              "0               60%  True  \n",
+              "1               60%  True  "
+            ]
+          },
+          "execution_count": 47,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "harness.report()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Moh61mF3AvAw"
+      },
+      "source": [
+        " Additional parameters (optional): You can pass additional parameters in the `training_data` dictionary to specify the details of the original dataset, such as the data source, subset, feature column, target column, and split. These parameters help in selecting the appropriate data for augmentation.\n",
+        "\n",
+        "    - Example:\n",
+        "```\n",
+        "data_kwargs = {\n",
+        "    \"data_source\": \"glue\",\n",
+        "    \"subset\": \"sst2\",\n",
+        "    \"feature_column\": \"sentence\",\n",
+        "    \"target_column\": \"label\",\n",
+        "    \"split\": \"train\"\n",
+        "}\n",
+        "```\n",
+        "        \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kB6ImMUC9IIO"
+      },
+      "outputs": [],
+      "source": [
+        "custom_proportions = {\n",
+        "    'add_ocr_typo':0.3\n",
+        "}\n",
+        "\n",
+        "data_kwargs = {\n",
+        "      \"data_source\" : \"glue\",\n",
+        "      \"subset\": \"sst2\",\n",
+        "      \"feature_column\": \"sentence\",\n",
+        "      \"target_column\": \"label\",\n",
+        "      \"split\": \"train\"\n",
+        "       }\n",
+        "\n",
+        "\n",
+        "harness.augment(\n",
+        "    training_data = data_kwargs,\n",
+        "    augmented_data =\"augmented_glue.csv\",\n",
+        "    custom_proportions=custom_proportions,\n",
+        "    export_mode=\"add\",\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YPXIxv9D_fR7"
+      },
+      "source": [
+        "Essentially it applies perturbations to the input data based on the recommendations from the harness reports. Then this augmented_dataset is used to retrain the original model so as to make the model more robust and improve its performance."
       ]
     }
   ],
diff --git a/docs/pages/docs/generate_augmentation.md b/docs/pages/docs/generate_augmentation.md
index 5e89085ea..9e6bcbb53 100644
--- a/docs/pages/docs/generate_augmentation.md
+++ b/docs/pages/docs/generate_augmentation.md
@@ -13,13 +13,33 @@ modify_date: "2023-03-28"
 The library provides a `augment()` method that facilitates the data augmentation process. 
 
 Several parameters are available: 
-- **`input_path`**, which is the path to the original training dataset to be augmented
-- **`output_path`**, which is the path to save the augmented dataset
-- **`inplace`** which is an optional parameter that controls whether the original input file should be augmented by duplicating rows in the dataset. By default, inplace is set to False. If True, the rows are modified in place and the length of the dataset remains similar. Otherwise, new rows are added to the dataset.
+
+- **`training_data`**: (Required) Specifies the source of the original training data. It should be a dictionary containing the necessary information about the dataset.
+
+- **`augmented_data`**: (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.
+
+- **`custom_proportions`**: (Required) custom_proportions is a dictionary with augmentation on test type as key and proportion as value. The proportion is the percentage of the test cases that will be augmented with the given augmentation type.
+
+- **`export_mode`**: (Optional) Specifies how the augmented data should be exported. The possible values are:
+    - `'inplace'`: Modifies the list of samples in place.
+    - `'add'`: Adds new samples to the input data.
+    - `'transformed'`: Exports only the transformed data, excluding different untransformed samples.
 
 ```python
-# Generating augmentations
-h.augment(input_path='training_dataset', output_path='augmented_dataset', inplace=False)
+custom_proportions = {
+    'add_typo':0.3,
+    'lowercase':0.3
+}
+
+data_kwargs = {
+      "data_source" : "conll03.conll",
+       }
+
+h.augment(
+    training_data = data_kwargs,
+    augmented_data ="augmented_conll03.conll",
+    custom_proportions=custom_proportions,
+    export_mode="transformed")
 ```
 
 This method applies perturbations to the input data based on the recommendations from the Harness report. This augmented dataset can then be used to retrain a model so as to make it more robust than its previous version.

From 62b349c0323b958c8cd95e4f063057f4087f04bb Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Wed, 2 Aug 2023 00:01:33 +0530
Subject: [PATCH 15/21] Docs(generate_aug.md): Updated For hf dataset

---
 docs/pages/docs/generate_augmentation.md | 29 +++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/docs/pages/docs/generate_augmentation.md b/docs/pages/docs/generate_augmentation.md
index 9e6bcbb53..93cfbfbbb 100644
--- a/docs/pages/docs/generate_augmentation.md
+++ b/docs/pages/docs/generate_augmentation.md
@@ -44,4 +44,31 @@ h.augment(
 
 This method applies perturbations to the input data based on the recommendations from the Harness report. This augmented dataset can then be used to retrain a model so as to make it more robust than its previous version.
 
-</div></div>
\ No newline at end of file
+</div></div><div class="h3-box" markdown="1">
+
+#### Passing a Hugging Face Dataset for Augmentation
+
+For Augmentations, we specify the HuggingFace data input in the following way:
+
+```python
+custom_proportions = {
+    'add_ocr_typo':0.3
+}
+
+data_kwargs = {
+      "data_source" : "glue",
+      "subset": "sst2",
+      "feature_column": "sentence",
+      "target_column": "label",
+      "split": "train"
+       }
+
+harness.augment(
+    training_data = data_kwargs,
+    augmented_data ="augmented_glue.csv",
+    custom_proportions=custom_proportions,
+    export_mode="add",
+)
+```
+
+</div>
\ No newline at end of file

From c94ca9f75a26817b7cc1d522373dfccd79ddffe4 Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Wed, 2 Aug 2023 13:41:29 +0530
Subject: [PATCH 16/21] Updated website for templatic augmentations

---
 docs/pages/docs/generate_augmentation.md | 25 ++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/docs/pages/docs/generate_augmentation.md b/docs/pages/docs/generate_augmentation.md
index 93cfbfbbb..e63f40153 100644
--- a/docs/pages/docs/generate_augmentation.md
+++ b/docs/pages/docs/generate_augmentation.md
@@ -46,6 +46,31 @@ This method applies perturbations to the input data based on the recommendations
 
 </div></div><div class="h3-box" markdown="1">
 
+#### Templatic Augmentations
+
+Templatic Augmentation is a technique that allows you to generate new training data by applying a set of predefined templates to the original training data. The templates are designed to introduce noise into the training data in a way that simulates real-world conditions. The augmentation process is controlled by a configuration file that specifies the augmentation templates to be used and the proportion of the training data to be augmented. The augmentation process is performed by the augment() method of the **Harness** class.
+
+Templatic augmentation is controlled by templates to be used with training data to be augmented. The augmentation process is performed by the augment() method of the **Harness** class.
+
+```
+template = ["The {ORG} company is located in {LOC}", "The {ORG} company is located in {LOC} and is owned by {PER}"]
+
+```
+
+```python
+data_kwargs = {
+      "data_source" : "conll03.conll",
+       }
+
+harness.augment(
+    training_data=data_kwargs,
+    augmented_data='augmented_conll03.conll',
+    templates=template,
+    )
+```
+
+</div><div class="h3-box" markdown="1">
+
 #### Passing a Hugging Face Dataset for Augmentation
 
 For Augmentations, we specify the HuggingFace data input in the following way:

From 9ebaa5c1348e48adbe9cd826ca300b373e3d13c2 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Wed, 2 Aug 2023 13:44:03 +0530
Subject: [PATCH 17/21] Augmentation notebook updated

---
 .../misc/Augmentation_Control_Notebook.ipynb  |   8 +-
 .../Templatic_Augmentation_Notebook.ipynb     | 785 ++++++++----------
 2 files changed, 331 insertions(+), 462 deletions(-)

diff --git a/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb b/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb
index 1e14fd913..46a25953b 100644
--- a/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb
+++ b/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb
@@ -1441,16 +1441,16 @@
       "source": [
         "The `.augment()` function takes the following parameters:\n",
         "\n",
-        "1. `training_data`: (Required) Specifies the source of the original training data. It should be a dictionary containing the necessary information about the dataset.\n",
+        "1. `training_data` (dict): (Required) Specifies the source of the original training data. It should be a dictionary containing the necessary information about the dataset.\n",
         "    - Example: `{\"data_source\": \"conll03.conll\"}`\n",
         "\n",
-        "2. `augmented_data`: (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.\n",
+        "2. `augmented_data` (str): (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.\n",
         "    - Example: `augmented_conll03.conll`\n",
         "\n",
-        "3. `custom_proportions`: (Required) custom_proportions is a dictionary with augmentation on test type as key and proportion as value. The proportion is the percentage of the test cases that will be augmented with the given augmentation type.\n",
+        "3. `custom_proportions` (dict): (Required) custom_proportions is a dictionary with augmentation on test type as key and proportion as value. The proportion is the percentage of the test cases that will be augmented with the given augmentation type.\n",
         "    - Example: `{\"add_typo\": 0.3, \"lowercase\": 0.3}`\n",
         "\n",
-        "4. `export_mode`: (Optional) Specifies how the augmented data should be exported. The possible values are:\n",
+        "4. `export_mode` (str): (Optional) Specifies how the augmented data should be exported. The possible values are:\n",
         "    - `'inplace'`: Modifies the list of samples in place.\n",
         "    - `'add'`: Adds new samples to the input data.\n",
         "    - `'transformed'`: Exports only the transformed data, excluding different untransformed samples.\n",
diff --git a/demo/tutorials/misc/Templatic_Augmentation_Notebook.ipynb b/demo/tutorials/misc/Templatic_Augmentation_Notebook.ipynb
index 4e6ff7067..1bf1dddfd 100644
--- a/demo/tutorials/misc/Templatic_Augmentation_Notebook.ipynb
+++ b/demo/tutorials/misc/Templatic_Augmentation_Notebook.ipynb
@@ -40,146 +40,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": null,
       "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000
-        },
-        "id": "oGIyE43uhTxH",
-        "outputId": "b581c350-77e9-4a07-d373-ae53fb6eb9b5"
+        "id": "oGIyE43uhTxH"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Collecting langtest[johnsnowlabs]\n",
-            "  Downloading langtest-1.1.0-py3-none-any.whl (59.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.8/59.8 MB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting jsonlines<4.0.0,>=3.1.0 (from langtest[johnsnowlabs])\n",
-            "  Downloading jsonlines-3.1.0-py3-none-any.whl (8.6 kB)\n",
-            "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from langtest[johnsnowlabs]) (1.5.6)\n",
-            "Collecting pandas<3.0.0,>=2.0.3 (from langtest[johnsnowlabs])\n",
-            "  Downloading pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m88.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting pydantic==1.10.6 (from langtest[johnsnowlabs])\n",
-            "  Downloading pydantic-1.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m92.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: pyyaml<7.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from langtest[johnsnowlabs]) (6.0)\n",
-            "Requirement already satisfied: tqdm<5.0.0,>=4.65.0 in /usr/local/lib/python3.10/dist-packages (from langtest[johnsnowlabs]) (4.65.0)\n",
-            "Collecting typing-extensions<4.6.0 (from langtest[johnsnowlabs])\n",
-            "  Downloading typing_extensions-4.5.0-py3-none-any.whl (27 kB)\n",
-            "Collecting johnsnowlabs==4.3.5 (from langtest[johnsnowlabs])\n",
-            "  Downloading johnsnowlabs-4.3.5-py3-none-any.whl (75 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.7/75.7 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting pyspark==3.1.2 (from johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading pyspark-3.1.2.tar.gz (212.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.4/212.4 MB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting spark-nlp==4.3.2 (from johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading spark_nlp-4.3.2-py2.py3-none-any.whl (473 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m473.2/473.2 kB\u001b[0m \u001b[31m31.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nlu==4.2.0 (from johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading nlu-4.2.0-py3-none-any.whl (639 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m639.9/639.9 kB\u001b[0m \u001b[31m49.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting spark-nlp-display==4.1 (from johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading spark_nlp_display-4.1-py3-none-any.whl (95 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.4/95.4 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (1.22.4)\n",
-            "Collecting dataclasses (from johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading dataclasses-0.6-py3-none-any.whl (14 kB)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (2.27.1)\n",
-            "Collecting databricks-api (from johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading databricks_api-0.9.0-py3-none-any.whl (7.4 kB)\n",
-            "Collecting colorama (from johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
-            "Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.10/dist-packages (from nlu==4.2.0->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (9.0.0)\n",
-            "Collecting py4j==0.10.9 (from pyspark==3.1.2->johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading py4j-0.10.9-py2.py3-none-any.whl (198 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m198.6/198.6 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: ipython in /usr/local/lib/python3.10/dist-packages (from spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (7.34.0)\n",
-            "Collecting svgwrite==1.4 (from spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading svgwrite-1.4-py3-none-any.whl (66 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonlines<4.0.0,>=3.1.0->langtest[johnsnowlabs]) (23.1.0)\n",
-            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0.0,>=2.0.3->langtest[johnsnowlabs]) (2.8.2)\n",
-            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0.0,>=2.0.3->langtest[johnsnowlabs]) (2022.7.1)\n",
-            "Collecting tzdata>=2022.1 (from pandas<3.0.0,>=2.0.3->langtest[johnsnowlabs])\n",
-            "  Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 kB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas<3.0.0,>=2.0.3->langtest[johnsnowlabs]) (1.16.0)\n",
-            "Collecting databricks-cli (from databricks-api->johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading databricks-cli-0.17.7.tar.gz (83 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.5/83.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (1.26.16)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (2023.5.7)\n",
-            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (2.0.12)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (3.4)\n",
-            "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from databricks-cli->databricks-api->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (8.1.4)\n",
-            "Requirement already satisfied: pyjwt>=1.7.0 in /usr/lib/python3/dist-packages (from databricks-cli->databricks-api->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (2.3.0)\n",
-            "Requirement already satisfied: oauthlib>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from databricks-cli->databricks-api->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (3.2.2)\n",
-            "Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.10/dist-packages (from databricks-cli->databricks-api->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (0.8.10)\n",
-            "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (67.7.2)\n",
-            "Collecting jedi>=0.16 (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs])\n",
-            "  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m74.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (4.4.2)\n",
-            "Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (0.7.5)\n",
-            "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (5.7.1)\n",
-            "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (3.0.39)\n",
-            "Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (2.14.0)\n",
-            "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (0.2.0)\n",
-            "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (0.1.6)\n",
-            "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (4.8.0)\n",
-            "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (0.8.3)\n",
-            "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (0.7.0)\n",
-            "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython->spark-nlp-display==4.1->johnsnowlabs==4.3.5->langtest[johnsnowlabs]) (0.2.6)\n",
-            "Building wheels for collected packages: pyspark, databricks-cli\n",
-            "  Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for pyspark: filename=pyspark-3.1.2-py2.py3-none-any.whl size=212880756 sha256=a525fa77974ef428d0f855d41353c331052adfb594a997d7598044e12271fd11\n",
-            "  Stored in directory: /root/.cache/pip/wheels/ef/70/50/7882e1bcb5693225f7cc86698f10953201b48b3f36317c2d18\n",
-            "  Building wheel for databricks-cli (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for databricks-cli: filename=databricks_cli-0.17.7-py3-none-any.whl size=143860 sha256=e78be081f408125550e40f4f19107f95f0b21497ad4f0570ed34acd736ebfe3c\n",
-            "  Stored in directory: /root/.cache/pip/wheels/ae/63/93/5402c1a09c1868a59d0b05013484e07af97a9d7b3dbd5bd39a\n",
-            "Successfully built pyspark databricks-cli\n",
-            "Installing collected packages: spark-nlp, py4j, dataclasses, tzdata, typing-extensions, svgwrite, pyspark, jsonlines, jedi, colorama, pydantic, pandas, databricks-cli, spark-nlp-display, nlu, langtest, databricks-api, johnsnowlabs\n",
-            "  Attempting uninstall: py4j\n",
-            "    Found existing installation: py4j 0.10.9.7\n",
-            "    Uninstalling py4j-0.10.9.7:\n",
-            "      Successfully uninstalled py4j-0.10.9.7\n",
-            "  Attempting uninstall: typing-extensions\n",
-            "    Found existing installation: typing_extensions 4.7.1\n",
-            "    Uninstalling typing_extensions-4.7.1:\n",
-            "      Successfully uninstalled typing_extensions-4.7.1\n",
-            "  Attempting uninstall: pydantic\n",
-            "    Found existing installation: pydantic 1.10.11\n",
-            "    Uninstalling pydantic-1.10.11:\n",
-            "      Successfully uninstalled pydantic-1.10.11\n",
-            "  Attempting uninstall: pandas\n",
-            "    Found existing installation: pandas 1.5.3\n",
-            "    Uninstalling pandas-1.5.3:\n",
-            "      Successfully uninstalled pandas-1.5.3\n",
-            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-            "google-colab 1.0.0 requires pandas==1.5.3, but you have pandas 2.0.3 which is incompatible.\u001b[0m\u001b[31m\n",
-            "\u001b[0mSuccessfully installed colorama-0.4.6 databricks-api-0.9.0 databricks-cli-0.17.7 dataclasses-0.6 jedi-0.18.2 johnsnowlabs-4.3.5 jsonlines-3.1.0 langtest-1.1.0 nlu-4.2.0 pandas-2.0.3 py4j-0.10.9 pydantic-1.10.6 pyspark-3.1.2 spark-nlp-4.3.2 spark-nlp-display-4.1 svgwrite-1.4 typing-extensions-4.5.0 tzdata-2023.3\n"
-          ]
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.colab-display-data+json": {
-              "pip_warning": {
-                "packages": [
-                  "dataclasses"
-                ]
-              }
-            }
-          },
-          "metadata": {}
-        }
-      ],
+      "outputs": [],
       "source": [
         "!pip install langtest[johnsnowlabs]"
       ]
@@ -197,7 +62,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": 5,
       "metadata": {
         "id": "lTzSJpMlhgq5"
       },
@@ -277,40 +142,40 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": 6,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "6uW22VqJje8E",
-        "outputId": "04e3b0ed-6113-4fe6-d316-f7db576fd28e"
+        "outputId": "a06dccd7-59ca-48b0-f657-811cc0a7ad22"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
-            "--2023-07-20 11:31:59--  https://raw.githubusercontent.com/JohnSnowLabs/langtest/main/langtest/data/conll/sample.conll\n",
-            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
-            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
+            "--2023-08-02 07:26:24--  https://raw.githubusercontent.com/JohnSnowLabs/langtest/main/langtest/data/conll/sample.conll\n",
+            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...\n",
+            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
             "HTTP request sent, awaiting response... 200 OK\n",
             "Length: 50519 (49K) [text/plain]\n",
             "Saving to: ‘sample.conll’\n",
             "\n",
-            "\rsample.conll          0%[                    ]       0  --.-KB/s               \rsample.conll        100%[===================>]  49.33K  --.-KB/s    in 0.004s  \n",
+            "\rsample.conll          0%[                    ]       0  --.-KB/s               \rsample.conll        100%[===================>]  49.33K  --.-KB/s    in 0.001s  \n",
             "\n",
-            "2023-07-20 11:32:00 (13.6 MB/s) - ‘sample.conll’ saved [50519/50519]\n",
+            "2023-08-02 07:26:24 (45.7 MB/s) - ‘sample.conll’ saved [50519/50519]\n",
             "\n",
-            "--2023-07-20 11:32:00--  https://raw.githubusercontent.com/JohnSnowLabs/langtest/main/demo/data/conll03.conll\n",
+            "--2023-08-02 07:26:24--  https://raw.githubusercontent.com/JohnSnowLabs/langtest/main/demo/data/conll03.conll\n",
             "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
             "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
             "HTTP request sent, awaiting response... 200 OK\n",
             "Length: 827443 (808K) [text/plain]\n",
             "Saving to: ‘conll03.conll’\n",
             "\n",
-            "conll03.conll       100%[===================>] 808.05K  --.-KB/s    in 0.02s   \n",
+            "conll03.conll       100%[===================>] 808.05K  --.-KB/s    in 0.05s   \n",
             "\n",
-            "2023-07-20 11:32:00 (46.7 MB/s) - ‘conll03.conll’ saved [827443/827443]\n",
+            "2023-08-02 07:26:24 (14.4 MB/s) - ‘conll03.conll’ saved [827443/827443]\n",
             "\n"
           ]
         }
@@ -334,7 +199,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": 7,
       "metadata": {
         "id": "jRnEmCfPhsZs"
       },
@@ -345,18 +210,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": 8,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "bHXeP18sGp-g",
-        "outputId": "6e09335a-7d95-4b6e-b6af-ec2911c13731"
+        "outputId": "7cc37e0b-c80e-4d8d-f6e5-fee115404ee9"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Warning::Spark Session already created, some configs may not take.\n",
             "small_bert_L2_128 download started this may take some time.\n",
@@ -380,18 +245,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": 9,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "RVk9NWn7u-Lm",
-        "outputId": "00146078-e7ba-4787-b3ab-b764aa709ad5"
+        "outputId": "0b61c376-36df-47e6-fb8f-68dc019bc2fc"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Test Configuration : \n",
             " {\n",
@@ -441,17 +306,16 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": 10,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "mynkAUwZyuFN",
-        "outputId": "378c66c5-b2e6-4d5a-fc31-bf655366d74a"
+        "outputId": "13035b12-4f98-483b-dc53-8a9cc59a6e80"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": [
               "{'tests': {'defaults': {'min_pass_rate': 0.65},\n",
@@ -459,8 +323,9 @@
               "   'lowercase': {'min_pass_rate': 0.65}}}}"
             ]
           },
+          "execution_count": 10,
           "metadata": {},
-          "execution_count": 6
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -499,29 +364,29 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": 11,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "UiUNzTwF89ye",
-        "outputId": "25ee4b2f-56bb-4822-be59-f1aa82ce2d1c"
+        "outputId": "533592a1-02a7-4c2b-a75f-4e37c3acb053"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
-            "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 4156.89it/s]\n"
+            "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 4911.36it/s]\n"
           ]
         },
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": []
           },
+          "execution_count": 11,
           "metadata": {},
-          "execution_count": 7
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -539,52 +404,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 8,
+      "execution_count": 12,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 423
         },
         "id": "p0tTwFfc891k",
-        "outputId": "f9d626b7-af13-4a13-c157-1ebf09da7281"
+        "outputId": "d1257af9-4ea7-4a5a-a88f-bc1c520d4abd"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "       category  test_type                                           original  \\\n",
-              "0    robustness   add_typo  SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...   \n",
-              "1    robustness   add_typo                                        Nadim Ladki   \n",
-              "2    robustness   add_typo           AL-AIN , United Arab Emirates 1996-12-06   \n",
-              "3    robustness   add_typo  Japan began the defence of their Asian Cup tit...   \n",
-              "4    robustness   add_typo  But China saw their luck desert them in the se...   \n",
-              "..          ...        ...                                                ...   \n",
-              "447  robustness  lowercase                    Portuguesa 1 Atletico Mineiro 0   \n",
-              "448  robustness  lowercase     CRICKET - LARA ENDURES ANOTHER MISERABLE DAY .   \n",
-              "449  robustness  lowercase                                      Robert Galvin   \n",
-              "450  robustness  lowercase                               MELBOURNE 1996-12-06   \n",
-              "451  robustness  lowercase  Australia gave Brian Lara another reason to be...   \n",
-              "\n",
-              "                                             test_case  \n",
-              "0    SOCCER - JAPAN GET LUCMY WIN , CHINA IN SURPRI...  \n",
-              "1                                          Nadim Ladli  \n",
-              "2             AL-AIN , United Arab Smirates 1996-12-06  \n",
-              "3    Japsn began the defence of their Asian Cup tit...  \n",
-              "4    But China saw their luck desery them in the se...  \n",
-              "..                                                 ...  \n",
-              "447                    portuguesa 1 atletico mineiro 0  \n",
-              "448     cricket - lara endures another miserable day .  \n",
-              "449                                      robert galvin  \n",
-              "450                               melbourne 1996-12-06  \n",
-              "451  australia gave brian lara another reason to be...  \n",
-              "\n",
-              "[452 rows x 4 columns]"
-            ],
             "text/html": [
               "\n",
               "\n",
-              "  <div id=\"df-538f70e6-2d16-4515-8a27-dd8daf9b4d17\">\n",
+              "  <div id=\"df-4a1abb29-edcd-4857-a28e-5386fe4ea7b3\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -616,35 +451,35 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
-              "      <td>SOCCER - JAPAN GET LUCMY WIN , CHINA IN SURPRI...</td>\n",
+              "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SYRPRI...</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Nadim Ladki</td>\n",
-              "      <td>Nadim Ladli</td>\n",
+              "      <td>Nadim Oadki</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>2</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>AL-AIN , United Arab Emirates 1996-12-06</td>\n",
-              "      <td>AL-AIN , United Arab Smirates 1996-12-06</td>\n",
+              "      <td>AL-AIN , United Arab Emirates 1996-1-206</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>3</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Japan began the defence of their Asian Cup tit...</td>\n",
-              "      <td>Japsn began the defence of their Asian Cup tit...</td>\n",
+              "      <td>Japan began the defence of their Asian Cup tiy...</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>4</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>But China saw their luck desert them in the se...</td>\n",
-              "      <td>But China saw their luck desery them in the se...</td>\n",
+              "      <td>But China saw their luck desert yhem in the se...</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>...</th>\n",
@@ -692,7 +527,7 @@
               "</table>\n",
               "<p>452 rows × 4 columns</p>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-538f70e6-2d16-4515-8a27-dd8daf9b4d17')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4a1abb29-edcd-4857-a28e-5386fe4ea7b3')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -705,8 +540,8 @@
               "\n",
               "\n",
               "\n",
-              "    <div id=\"df-4b854743-3da1-43e5-8049-51ebc6706070\">\n",
-              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-4b854743-3da1-43e5-8049-51ebc6706070')\"\n",
+              "    <div id=\"df-ceac4ee4-81a7-4aa7-840f-72869df8f80c\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-ceac4ee4-81a7-4aa7-840f-72869df8f80c')\"\n",
               "              title=\"Suggest charts.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -763,7 +598,7 @@
               "\n",
               "function displayQuickchartButton(domScope) {\n",
               "  let quickchartButtonEl =\n",
-              "    domScope.querySelector('#df-4b854743-3da1-43e5-8049-51ebc6706070 button.colab-df-quickchart');\n",
+              "    domScope.querySelector('#df-ceac4ee4-81a7-4aa7-840f-72869df8f80c button.colab-df-quickchart');\n",
               "  quickchartButtonEl.style.display =\n",
               "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "}\n",
@@ -810,12 +645,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-538f70e6-2d16-4515-8a27-dd8daf9b4d17 button.colab-df-convert');\n",
+              "          document.querySelector('#df-4a1abb29-edcd-4857-a28e-5386fe4ea7b3 button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-538f70e6-2d16-4515-8a27-dd8daf9b4d17');\n",
+              "          const element = document.querySelector('#df-4a1abb29-edcd-4857-a28e-5386fe4ea7b3');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -834,10 +669,40 @@
               "      </script>\n",
               "    </div>\n",
               "  </div>\n"
+            ],
+            "text/plain": [
+              "       category  test_type                                           original  \\\n",
+              "0    robustness   add_typo  SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...   \n",
+              "1    robustness   add_typo                                        Nadim Ladki   \n",
+              "2    robustness   add_typo           AL-AIN , United Arab Emirates 1996-12-06   \n",
+              "3    robustness   add_typo  Japan began the defence of their Asian Cup tit...   \n",
+              "4    robustness   add_typo  But China saw their luck desert them in the se...   \n",
+              "..          ...        ...                                                ...   \n",
+              "447  robustness  lowercase                    Portuguesa 1 Atletico Mineiro 0   \n",
+              "448  robustness  lowercase     CRICKET - LARA ENDURES ANOTHER MISERABLE DAY .   \n",
+              "449  robustness  lowercase                                      Robert Galvin   \n",
+              "450  robustness  lowercase                               MELBOURNE 1996-12-06   \n",
+              "451  robustness  lowercase  Australia gave Brian Lara another reason to be...   \n",
+              "\n",
+              "                                             test_case  \n",
+              "0    SOCCER - JAPAN GET LUCKY WIN , CHINA IN SYRPRI...  \n",
+              "1                                          Nadim Oadki  \n",
+              "2             AL-AIN , United Arab Emirates 1996-1-206  \n",
+              "3    Japan began the defence of their Asian Cup tiy...  \n",
+              "4    But China saw their luck desert yhem in the se...  \n",
+              "..                                                 ...  \n",
+              "447                    portuguesa 1 atletico mineiro 0  \n",
+              "448     cricket - lara endures another miserable day .  \n",
+              "449                                      robert galvin  \n",
+              "450                               melbourne 1996-12-06  \n",
+              "451  australia gave brian lara another reason to be...  \n",
+              "\n",
+              "[452 rows x 4 columns]"
             ]
           },
+          "execution_count": 12,
           "metadata": {},
-          "execution_count": 8
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -864,7 +729,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 9,
+      "execution_count": 13,
       "metadata": {
         "id": "ba0MYutC96CN"
       },
@@ -884,29 +749,29 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 10,
+      "execution_count": 14,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "CHQHRbQb9EDi",
-        "outputId": "4194d0cb-e4e0-4739-8752-d46e55751f9c"
+        "outputId": "66da2039-aadb-4e80-d885-814bad4acafc"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
-            "Running testcases... : 100%|██████████| 452/452 [01:08<00:00,  6.57it/s]\n"
+            "Running testcases... : 100%|██████████| 452/452 [01:04<00:00,  7.01it/s]\n"
           ]
         },
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": []
           },
+          "execution_count": 14,
           "metadata": {},
-          "execution_count": 10
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -924,78 +789,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 11,
+      "execution_count": 15,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 589
+          "height": 606
         },
         "id": "keBNodfJ894u",
-        "outputId": "caeed91c-7bca-4575-bb6c-7afc3d6ed081"
+        "outputId": "7dae4242-cf8f-40bd-8915-0b1edd56769b"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "       category  test_type                                           original  \\\n",
-              "0    robustness   add_typo  SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...   \n",
-              "1    robustness   add_typo                                        Nadim Ladki   \n",
-              "2    robustness   add_typo           AL-AIN , United Arab Emirates 1996-12-06   \n",
-              "3    robustness   add_typo  Japan began the defence of their Asian Cup tit...   \n",
-              "4    robustness   add_typo  But China saw their luck desert them in the se...   \n",
-              "..          ...        ...                                                ...   \n",
-              "447  robustness  lowercase                    Portuguesa 1 Atletico Mineiro 0   \n",
-              "448  robustness  lowercase     CRICKET - LARA ENDURES ANOTHER MISERABLE DAY .   \n",
-              "449  robustness  lowercase                                      Robert Galvin   \n",
-              "450  robustness  lowercase                               MELBOURNE 1996-12-06   \n",
-              "451  robustness  lowercase  Australia gave Brian Lara another reason to be...   \n",
-              "\n",
-              "                                             test_case  \\\n",
-              "0    SOCCER - JAPAN GET LUCMY WIN , CHINA IN SURPRI...   \n",
-              "1                                          Nadim Ladli   \n",
-              "2             AL-AIN , United Arab Smirates 1996-12-06   \n",
-              "3    Japsn began the defence of their Asian Cup tit...   \n",
-              "4    But China saw their luck desery them in the se...   \n",
-              "..                                                 ...   \n",
-              "447                    portuguesa 1 atletico mineiro 0   \n",
-              "448     cricket - lara endures another miserable day .   \n",
-              "449                                      robert galvin   \n",
-              "450                               melbourne 1996-12-06   \n",
-              "451  australia gave brian lara another reason to be...   \n",
-              "\n",
-              "                                       expected_result  \\\n",
-              "0                               japan: LOC, china: LOC   \n",
-              "1                                     nadim ladki: PER   \n",
-              "2               al-ain: LOC, united arab emirates: LOC   \n",
-              "3              japan: LOC, asian cup: MISC, syria: LOC   \n",
-              "4                          china: LOC, uzbekistan: LOC   \n",
-              "..                                                 ...   \n",
-              "447             portuguesa: ORG, atletico mineiro: ORG   \n",
-              "448                                          lara: PER   \n",
-              "449                                 robert galvin: PER   \n",
-              "450                                     melbourne: LOC   \n",
-              "451  australia: LOC, brian lara: PER, west indies: LOC   \n",
-              "\n",
-              "                                         actual_result   pass  \n",
-              "0                   japan: LOC, lucmy: PER, china: LOC   True  \n",
-              "1                                     nadim ladli: PER   True  \n",
-              "2                        al-ain: LOC, united arab: LOC  False  \n",
-              "3                  japsn: PER, asian: MISC, syria: LOC  False  \n",
-              "4                          china: LOC, uzbekistan: LOC   True  \n",
-              "..                                                 ...    ...  \n",
-              "447             portuguesa: ORG, atletico mineiro: ORG   True  \n",
-              "448                                          lara: PER   True  \n",
-              "449                                 robert galvin: PER   True  \n",
-              "450                                     melbourne: LOC   True  \n",
-              "451  australia: LOC, brian lara: PER, west indies: LOC   True  \n",
-              "\n",
-              "[452 rows x 7 columns]"
-            ],
             "text/html": [
               "\n",
               "\n",
-              "  <div id=\"df-f8fa02f8-e82a-43de-bc97-78cec2345a66\">\n",
+              "  <div id=\"df-164a30c9-762e-46b4-b276-b9cdab09a1cc\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -1030,9 +839,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
-              "      <td>SOCCER - JAPAN GET LUCMY WIN , CHINA IN SURPRI...</td>\n",
+              "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SYRPRI...</td>\n",
               "      <td>japan: LOC, china: LOC</td>\n",
-              "      <td>japan: LOC, lucmy: PER, china: LOC</td>\n",
+              "      <td>japan: LOC, china: LOC, syrprise: LOC</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1040,9 +849,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Nadim Ladki</td>\n",
-              "      <td>Nadim Ladli</td>\n",
+              "      <td>Nadim Oadki</td>\n",
               "      <td>nadim ladki: PER</td>\n",
-              "      <td>nadim ladli: PER</td>\n",
+              "      <td>nadim oadki: PER</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1050,19 +859,19 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>AL-AIN , United Arab Emirates 1996-12-06</td>\n",
-              "      <td>AL-AIN , United Arab Smirates 1996-12-06</td>\n",
+              "      <td>AL-AIN , United Arab Emirates 1996-1-206</td>\n",
               "      <td>al-ain: LOC, united arab emirates: LOC</td>\n",
-              "      <td>al-ain: LOC, united arab: LOC</td>\n",
-              "      <td>False</td>\n",
+              "      <td>al-ain: LOC, united arab emirates: LOC</td>\n",
+              "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>3</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Japan began the defence of their Asian Cup tit...</td>\n",
-              "      <td>Japsn began the defence of their Asian Cup tit...</td>\n",
+              "      <td>Japan began the defence of their Asian Cup tiy...</td>\n",
               "      <td>japan: LOC, asian cup: MISC, syria: LOC</td>\n",
-              "      <td>japsn: PER, asian: MISC, syria: LOC</td>\n",
+              "      <td>japan: LOC, asian: MISC, syria: LOC</td>\n",
               "      <td>False</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1070,7 +879,7 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>But China saw their luck desert them in the se...</td>\n",
-              "      <td>But China saw their luck desery them in the se...</td>\n",
+              "      <td>But China saw their luck desert yhem in the se...</td>\n",
               "      <td>china: LOC, uzbekistan: LOC</td>\n",
               "      <td>china: LOC, uzbekistan: LOC</td>\n",
               "      <td>True</td>\n",
@@ -1131,15 +940,15 @@
               "      <td>lowercase</td>\n",
               "      <td>Australia gave Brian Lara another reason to be...</td>\n",
               "      <td>australia gave brian lara another reason to be...</td>\n",
-              "      <td>australia: LOC, brian lara: PER, west indies: LOC</td>\n",
-              "      <td>australia: LOC, brian lara: PER, west indies: LOC</td>\n",
+              "      <td>australia: LOC, brian lara: PER, west indies: ...</td>\n",
+              "      <td>australia: LOC, brian lara: PER, west indies: ...</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
               "<p>452 rows × 7 columns</p>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f8fa02f8-e82a-43de-bc97-78cec2345a66')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-164a30c9-762e-46b4-b276-b9cdab09a1cc')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -1152,8 +961,8 @@
               "\n",
               "\n",
               "\n",
-              "    <div id=\"df-56c941eb-d73b-4cbb-94c6-0149ed3d3005\">\n",
-              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-56c941eb-d73b-4cbb-94c6-0149ed3d3005')\"\n",
+              "    <div id=\"df-60a9ac68-8d38-45db-aa4e-88a0a2bd9094\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-60a9ac68-8d38-45db-aa4e-88a0a2bd9094')\"\n",
               "              title=\"Suggest charts.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -1210,7 +1019,7 @@
               "\n",
               "function displayQuickchartButton(domScope) {\n",
               "  let quickchartButtonEl =\n",
-              "    domScope.querySelector('#df-56c941eb-d73b-4cbb-94c6-0149ed3d3005 button.colab-df-quickchart');\n",
+              "    domScope.querySelector('#df-60a9ac68-8d38-45db-aa4e-88a0a2bd9094 button.colab-df-quickchart');\n",
               "  quickchartButtonEl.style.display =\n",
               "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "}\n",
@@ -1257,12 +1066,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-f8fa02f8-e82a-43de-bc97-78cec2345a66 button.colab-df-convert');\n",
+              "          document.querySelector('#df-164a30c9-762e-46b4-b276-b9cdab09a1cc button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-f8fa02f8-e82a-43de-bc97-78cec2345a66');\n",
+              "          const element = document.querySelector('#df-164a30c9-762e-46b4-b276-b9cdab09a1cc');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -1281,10 +1090,66 @@
               "      </script>\n",
               "    </div>\n",
               "  </div>\n"
+            ],
+            "text/plain": [
+              "       category  test_type                                           original  \\\n",
+              "0    robustness   add_typo  SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...   \n",
+              "1    robustness   add_typo                                        Nadim Ladki   \n",
+              "2    robustness   add_typo           AL-AIN , United Arab Emirates 1996-12-06   \n",
+              "3    robustness   add_typo  Japan began the defence of their Asian Cup tit...   \n",
+              "4    robustness   add_typo  But China saw their luck desert them in the se...   \n",
+              "..          ...        ...                                                ...   \n",
+              "447  robustness  lowercase                    Portuguesa 1 Atletico Mineiro 0   \n",
+              "448  robustness  lowercase     CRICKET - LARA ENDURES ANOTHER MISERABLE DAY .   \n",
+              "449  robustness  lowercase                                      Robert Galvin   \n",
+              "450  robustness  lowercase                               MELBOURNE 1996-12-06   \n",
+              "451  robustness  lowercase  Australia gave Brian Lara another reason to be...   \n",
+              "\n",
+              "                                             test_case  \\\n",
+              "0    SOCCER - JAPAN GET LUCKY WIN , CHINA IN SYRPRI...   \n",
+              "1                                          Nadim Oadki   \n",
+              "2             AL-AIN , United Arab Emirates 1996-1-206   \n",
+              "3    Japan began the defence of their Asian Cup tiy...   \n",
+              "4    But China saw their luck desert yhem in the se...   \n",
+              "..                                                 ...   \n",
+              "447                    portuguesa 1 atletico mineiro 0   \n",
+              "448     cricket - lara endures another miserable day .   \n",
+              "449                                      robert galvin   \n",
+              "450                               melbourne 1996-12-06   \n",
+              "451  australia gave brian lara another reason to be...   \n",
+              "\n",
+              "                                       expected_result  \\\n",
+              "0                               japan: LOC, china: LOC   \n",
+              "1                                     nadim ladki: PER   \n",
+              "2               al-ain: LOC, united arab emirates: LOC   \n",
+              "3              japan: LOC, asian cup: MISC, syria: LOC   \n",
+              "4                          china: LOC, uzbekistan: LOC   \n",
+              "..                                                 ...   \n",
+              "447             portuguesa: ORG, atletico mineiro: ORG   \n",
+              "448                                          lara: PER   \n",
+              "449                                 robert galvin: PER   \n",
+              "450                                     melbourne: LOC   \n",
+              "451  australia: LOC, brian lara: PER, west indies: ...   \n",
+              "\n",
+              "                                         actual_result   pass  \n",
+              "0                japan: LOC, china: LOC, syrprise: LOC   True  \n",
+              "1                                     nadim oadki: PER   True  \n",
+              "2               al-ain: LOC, united arab emirates: LOC   True  \n",
+              "3                  japan: LOC, asian: MISC, syria: LOC  False  \n",
+              "4                          china: LOC, uzbekistan: LOC   True  \n",
+              "..                                                 ...    ...  \n",
+              "447             portuguesa: ORG, atletico mineiro: ORG   True  \n",
+              "448                                          lara: PER   True  \n",
+              "449                                 robert galvin: PER   True  \n",
+              "450                                     melbourne: LOC   True  \n",
+              "451  australia: LOC, brian lara: PER, west indies: ...   True  \n",
+              "\n",
+              "[452 rows x 7 columns]"
             ]
           },
+          "execution_count": 15,
           "metadata": {},
-          "execution_count": 11
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1311,32 +1176,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 12,
+      "execution_count": 16,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 112
         },
         "id": "gp57HcF9yxi7",
-        "outputId": "c4c039f1-e815-413e-add6-ae12377d3a3d"
+        "outputId": "e55491a2-fab6-4158-aa52-664d81567f70"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "     category  test_type  fail_count  pass_count pass_rate minimum_pass_rate  \\\n",
-              "0  robustness   add_typo          59         167       74%               73%   \n",
-              "1  robustness  lowercase           0         226      100%               65%   \n",
-              "\n",
-              "   pass  \n",
-              "0  True  \n",
-              "1  True  "
-            ],
             "text/html": [
               "\n",
               "\n",
-              "  <div id=\"df-d6b45477-4f08-478d-90cd-b84a6036b851\">\n",
+              "  <div id=\"df-2f4ac5b4-a2be-4cbc-9dcc-d21231ebe4f1\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -1370,9 +1225,9 @@
               "      <th>0</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
-              "      <td>59</td>\n",
-              "      <td>167</td>\n",
-              "      <td>74%</td>\n",
+              "      <td>57</td>\n",
+              "      <td>169</td>\n",
+              "      <td>75%</td>\n",
               "      <td>73%</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
@@ -1389,7 +1244,7 @@
               "  </tbody>\n",
               "</table>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d6b45477-4f08-478d-90cd-b84a6036b851')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2f4ac5b4-a2be-4cbc-9dcc-d21231ebe4f1')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -1402,8 +1257,8 @@
               "\n",
               "\n",
               "\n",
-              "    <div id=\"df-2c81bff7-82c1-42f0-83db-ae5359e4994a\">\n",
-              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-2c81bff7-82c1-42f0-83db-ae5359e4994a')\"\n",
+              "    <div id=\"df-3fbe3748-3e8b-48d6-90b8-5f5688f26aaf\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-3fbe3748-3e8b-48d6-90b8-5f5688f26aaf')\"\n",
               "              title=\"Suggest charts.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -1460,7 +1315,7 @@
               "\n",
               "function displayQuickchartButton(domScope) {\n",
               "  let quickchartButtonEl =\n",
-              "    domScope.querySelector('#df-2c81bff7-82c1-42f0-83db-ae5359e4994a button.colab-df-quickchart');\n",
+              "    domScope.querySelector('#df-3fbe3748-3e8b-48d6-90b8-5f5688f26aaf button.colab-df-quickchart');\n",
               "  quickchartButtonEl.style.display =\n",
               "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "}\n",
@@ -1507,12 +1362,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-d6b45477-4f08-478d-90cd-b84a6036b851 button.colab-df-convert');\n",
+              "          document.querySelector('#df-2f4ac5b4-a2be-4cbc-9dcc-d21231ebe4f1 button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-d6b45477-4f08-478d-90cd-b84a6036b851');\n",
+              "          const element = document.querySelector('#df-2f4ac5b4-a2be-4cbc-9dcc-d21231ebe4f1');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -1531,10 +1386,20 @@
               "      </script>\n",
               "    </div>\n",
               "  </div>\n"
+            ],
+            "text/plain": [
+              "     category  test_type  fail_count  pass_count pass_rate minimum_pass_rate  \\\n",
+              "0  robustness   add_typo          57         169       75%               73%   \n",
+              "1  robustness  lowercase           0         226      100%               65%   \n",
+              "\n",
+              "   pass  \n",
+              "0  True  \n",
+              "1  True  "
             ]
           },
+          "execution_count": 16,
           "metadata": {},
-          "execution_count": 12
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1585,35 +1450,39 @@
       "source": [
         "The `.augment()` function takes the following parameters:\n",
         "\n",
-        "- `input_path` (str): Path to the input file.\n",
-        "- `output_path` (str): Path to save the augmented data.\n",
+        "- `training_data` (dict): (Required) Specifies the source of the original training data. It should be a dictionary containing the necessary information about the dataset.\n",
+        "- `augmented_data` (str): (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.\n",
         "- `templates` (list): List of templates(string) or conll file to be used for augmentation."
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 13,
+      "execution_count": 17,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "EBTz4Fqev7xX",
-        "outputId": "d99b9470-4740-43d5-f97a-3890bb7d6b73"
+        "outputId": "f1bc93ab-0de7-47f4-f920-e0a7b431604f"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": []
           },
+          "execution_count": 17,
           "metadata": {},
-          "execution_count": 13
+          "output_type": "execute_result"
         }
       ],
       "source": [
+        "data_kwargs = {\n",
+        "      \"data_source\" : \"conll03.conll\",\n",
+        "       }\n",
+        "\n",
         "harness.augment(\n",
-        "    input_path=\"conll03.conll\",\n",
-        "    output_path='augmented_conll03.conll',\n",
+        "    training_data=data_kwargs,\n",
+        "    augmented_data='augmented_conll03.conll',\n",
         "    templates=[\"The {ORG} company is located in {LOC}\", \"The {ORG} company is located in {LOC} and is owned by {PER}\"],\n",
         "    )"
       ]
@@ -1629,39 +1498,39 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 14,
+      "execution_count": 18,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "tKOgWXL145WR",
-        "outputId": "a118ec76-8aff-467a-be0a-16e9f72591ff"
+        "outputId": "557a40d2-cc46-4826-c7d0-4f58dba3a810"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "\n",
             "\n",
             "The -X- -X- O\n",
-            "Penske NNP B-NP B-ORG\n",
-            "Mercedes-Benz NNP I-NP I-ORG\n",
+            "Cofinec NNP I-NP B-ORG\n",
+            "S.A. NNP I-NP I-ORG\n",
             "company -X- -X- O\n",
             "is -X- -X- O\n",
             "located -X- -X- O\n",
             "in -X- -X- O\n",
-            "Wall NNP B-NP B-LOC\n",
-            "Street NNP I-NP I-LOC\n",
+            "Iraq NNP B-NP B-LOC\n",
             "\n",
             "The -X- -X- O\n",
-            "St NNP B-NP B-ORG\n",
-            "Mirren NNP I-NP I-ORG\n",
+            "Cleveland NNP B-NP B-ORG\n",
             "company -X- -X- O\n",
             "is -X- -X- O\n",
             "located -X- -X- O\n",
             "in -X- -X- O\n",
-            "Mich NNP B-NP B-LOC\n"
+            "Russia NNP B-NP B-LOC\n",
+            "\n",
+            "The -X- -X- O\n"
           ]
         }
       ],
@@ -1680,18 +1549,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 15,
+      "execution_count": 19,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "WvRFmf3PGz3k",
-        "outputId": "9d3e7e1a-ad43-4e8c-f670-263482f5d139"
+        "outputId": "2fd62c72-2ba0-4d3d-f098-47f0541646ef"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Warning::Spark Session already created, some configs may not take.\n",
             "Warning::Spark Session already created, some configs may not take.\n",
@@ -1716,18 +1585,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 16,
+      "execution_count": 20,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "UpaSjj05_fPd",
-        "outputId": "c38a702b-9bd8-4eba-816b-ebeadcd0352e"
+        "outputId": "f3fbcfb2-1ffb-4823-c5c9-9dab8d07a582"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Test Configuration : \n",
             " {\n",
@@ -1748,10 +1617,10 @@
           ]
         },
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
-            "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 5216.80it/s]\n"
+            "Generating testcases...: 100%|██████████| 1/1 [00:00<00:00, 5809.29it/s]\n"
           ]
         }
       ],
@@ -1770,29 +1639,29 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 17,
+      "execution_count": 21,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "StrOVtMoAQpf",
-        "outputId": "e1e63cf7-6e8e-448e-8a6c-54ef1e850513"
+        "outputId": "2e6d1fd7-9ee7-40de-a27f-ce91d71b549d"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
-            "Running testcases... : 100%|██████████| 452/452 [01:12<00:00,  6.21it/s]\n"
+            "Running testcases... : 100%|██████████| 452/452 [01:00<00:00,  7.46it/s]\n"
           ]
         },
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": []
           },
+          "execution_count": 21,
           "metadata": {},
-          "execution_count": 17
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1801,78 +1670,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 18,
+      "execution_count": 22,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 606
         },
         "id": "znh2xqQmAWHf",
-        "outputId": "5522f07b-8b2b-42e9-d362-234c7678b3d2"
+        "outputId": "fb64892a-5952-4393-cb49-7cdc72c714b9"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "       category  test_type                                           original  \\\n",
-              "0    robustness   add_typo  SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...   \n",
-              "1    robustness   add_typo                                        Nadim Ladki   \n",
-              "2    robustness   add_typo           AL-AIN , United Arab Emirates 1996-12-06   \n",
-              "3    robustness   add_typo  Japan began the defence of their Asian Cup tit...   \n",
-              "4    robustness   add_typo  But China saw their luck desert them in the se...   \n",
-              "..          ...        ...                                                ...   \n",
-              "447  robustness  lowercase                    Portuguesa 1 Atletico Mineiro 0   \n",
-              "448  robustness  lowercase     CRICKET - LARA ENDURES ANOTHER MISERABLE DAY .   \n",
-              "449  robustness  lowercase                                      Robert Galvin   \n",
-              "450  robustness  lowercase                               MELBOURNE 1996-12-06   \n",
-              "451  robustness  lowercase  Australia gave Brian Lara another reason to be...   \n",
-              "\n",
-              "                                             test_case  \\\n",
-              "0    WOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...   \n",
-              "1                                          NadimL adki   \n",
-              "2             AL-AIN , United Arab Rmirates 1996-12-06   \n",
-              "3    Japan began the defence of their Asian Cup tit...   \n",
-              "4    But China saw their luck desert yhem in the se...   \n",
-              "..                                                 ...   \n",
-              "447                    portuguesa 1 atletico mineiro 0   \n",
-              "448     cricket - lara endures another miserable day .   \n",
-              "449                                      robert galvin   \n",
-              "450                               melbourne 1996-12-06   \n",
-              "451  australia gave brian lara another reason to be...   \n",
-              "\n",
-              "                                       expected_result  \\\n",
-              "0    soccer - japan get lucky win , china in surpri...   \n",
-              "1                                     nadim ladki: ORG   \n",
-              "2        al-ain , united arab emirates 1996-12-06: ORG   \n",
-              "3    japan began the defence of their asian cup tit...   \n",
-              "4    but: ORG, china saw their luck desert them in ...   \n",
-              "..                                                 ...   \n",
-              "447               portuguesa 1 atletico mineiro 0: ORG   \n",
-              "448  cricket - lara endures another miserable day: ORG   \n",
-              "449                                 robert galvin: PER   \n",
-              "450                    melbourne: PER, 1996-12-06: ORG   \n",
-              "451  australia gave brian lara another reason to be...   \n",
-              "\n",
-              "                                         actual_result  pass  \n",
-              "0    woccer - japan get lucky win , china in surpri...  True  \n",
-              "1                                     nadiml adki: ORG  True  \n",
-              "2        al-ain , united arab rmirates 1996-12-06: ORG  True  \n",
-              "3    japan began the defence of their asian cup tit...  True  \n",
-              "4    but: ORG, china saw their luck desert yhem in ...  True  \n",
-              "..                                                 ...   ...  \n",
-              "447               portuguesa 1 atletico mineiro 0: ORG  True  \n",
-              "448  cricket - lara endures another miserable day: ORG  True  \n",
-              "449                                 robert galvin: PER  True  \n",
-              "450                    melbourne: PER, 1996-12-06: ORG  True  \n",
-              "451  australia gave brian lara another reason to be...  True  \n",
-              "\n",
-              "[452 rows x 7 columns]"
-            ],
             "text/html": [
               "\n",
               "\n",
-              "  <div id=\"df-78332439-7f6e-43b3-b154-f4b4b4ba6191\">\n",
+              "  <div id=\"df-e1b6e7c9-fb31-4961-a978-02e6ef0c85b3\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -1907,9 +1720,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
-              "      <td>WOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...</td>\n",
+              "      <td>SOCCER - JAPAN GEY LUCKY WIN , CHINA IN SURPRI...</td>\n",
               "      <td>soccer - japan get lucky win , china in surpri...</td>\n",
-              "      <td>woccer - japan get lucky win , china in surpri...</td>\n",
+              "      <td>soccer - japan gey lucky win , china in surpri...</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1917,9 +1730,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>Nadim Ladki</td>\n",
-              "      <td>NadimL adki</td>\n",
+              "      <td>Nadim Ladoi</td>\n",
               "      <td>nadim ladki: ORG</td>\n",
-              "      <td>nadiml adki: ORG</td>\n",
+              "      <td>nadim ladoi: ORG</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1927,9 +1740,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>AL-AIN , United Arab Emirates 1996-12-06</td>\n",
-              "      <td>AL-AIN , United Arab Rmirates 1996-12-06</td>\n",
+              "      <td>AL-AIN , United Arab Emirstes 1996-12-06</td>\n",
               "      <td>al-ain , united arab emirates 1996-12-06: ORG</td>\n",
-              "      <td>al-ain , united arab rmirates 1996-12-06: ORG</td>\n",
+              "      <td>al-ain , united arab emirstes 1996-12-06: ORG</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1947,9 +1760,9 @@
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
               "      <td>But China saw their luck desert them in the se...</td>\n",
-              "      <td>But China saw their luck desert yhem in the se...</td>\n",
-              "      <td>but: ORG, china saw their luck desert them in ...</td>\n",
-              "      <td>but: ORG, china saw their luck desert yhem in ...</td>\n",
+              "      <td>But China saw their luck desert them in the se...</td>\n",
+              "      <td>but china saw their luck desert them in the se...</td>\n",
+              "      <td>but china saw their luck desert them in the se...</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1988,8 +1801,8 @@
               "      <td>lowercase</td>\n",
               "      <td>Robert Galvin</td>\n",
               "      <td>robert galvin</td>\n",
-              "      <td>robert galvin: PER</td>\n",
-              "      <td>robert galvin: PER</td>\n",
+              "      <td>robert: PER, galvin: ORG</td>\n",
+              "      <td>robert: PER, galvin: ORG</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -1998,8 +1811,8 @@
               "      <td>lowercase</td>\n",
               "      <td>MELBOURNE 1996-12-06</td>\n",
               "      <td>melbourne 1996-12-06</td>\n",
-              "      <td>melbourne: PER, 1996-12-06: ORG</td>\n",
-              "      <td>melbourne: PER, 1996-12-06: ORG</td>\n",
+              "      <td>melbourne 1996-12-06: ORG</td>\n",
+              "      <td>melbourne 1996-12-06: ORG</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "    <tr>\n",
@@ -2008,15 +1821,15 @@
               "      <td>lowercase</td>\n",
               "      <td>Australia gave Brian Lara another reason to be...</td>\n",
               "      <td>australia gave brian lara another reason to be...</td>\n",
-              "      <td>australia gave brian lara another reason to be...</td>\n",
-              "      <td>australia gave brian lara another reason to be...</td>\n",
+              "      <td>australia: ORG, gave: PER, brian lara another ...</td>\n",
+              "      <td>australia: ORG, gave: PER, brian lara another ...</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
               "<p>452 rows × 7 columns</p>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-78332439-7f6e-43b3-b154-f4b4b4ba6191')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e1b6e7c9-fb31-4961-a978-02e6ef0c85b3')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -2029,8 +1842,8 @@
               "\n",
               "\n",
               "\n",
-              "    <div id=\"df-24382274-ba05-4639-bb83-69a34002bf9c\">\n",
-              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-24382274-ba05-4639-bb83-69a34002bf9c')\"\n",
+              "    <div id=\"df-20ccf260-1872-4323-82a9-2f399d441f12\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-20ccf260-1872-4323-82a9-2f399d441f12')\"\n",
               "              title=\"Suggest charts.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -2087,7 +1900,7 @@
               "\n",
               "function displayQuickchartButton(domScope) {\n",
               "  let quickchartButtonEl =\n",
-              "    domScope.querySelector('#df-24382274-ba05-4639-bb83-69a34002bf9c button.colab-df-quickchart');\n",
+              "    domScope.querySelector('#df-20ccf260-1872-4323-82a9-2f399d441f12 button.colab-df-quickchart');\n",
               "  quickchartButtonEl.style.display =\n",
               "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "}\n",
@@ -2134,12 +1947,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-78332439-7f6e-43b3-b154-f4b4b4ba6191 button.colab-df-convert');\n",
+              "          document.querySelector('#df-e1b6e7c9-fb31-4961-a978-02e6ef0c85b3 button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-78332439-7f6e-43b3-b154-f4b4b4ba6191');\n",
+              "          const element = document.querySelector('#df-e1b6e7c9-fb31-4961-a978-02e6ef0c85b3');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -2158,10 +1971,66 @@
               "      </script>\n",
               "    </div>\n",
               "  </div>\n"
+            ],
+            "text/plain": [
+              "       category  test_type                                           original  \\\n",
+              "0    robustness   add_typo  SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...   \n",
+              "1    robustness   add_typo                                        Nadim Ladki   \n",
+              "2    robustness   add_typo           AL-AIN , United Arab Emirates 1996-12-06   \n",
+              "3    robustness   add_typo  Japan began the defence of their Asian Cup tit...   \n",
+              "4    robustness   add_typo  But China saw their luck desert them in the se...   \n",
+              "..          ...        ...                                                ...   \n",
+              "447  robustness  lowercase                    Portuguesa 1 Atletico Mineiro 0   \n",
+              "448  robustness  lowercase     CRICKET - LARA ENDURES ANOTHER MISERABLE DAY .   \n",
+              "449  robustness  lowercase                                      Robert Galvin   \n",
+              "450  robustness  lowercase                               MELBOURNE 1996-12-06   \n",
+              "451  robustness  lowercase  Australia gave Brian Lara another reason to be...   \n",
+              "\n",
+              "                                             test_case  \\\n",
+              "0    SOCCER - JAPAN GEY LUCKY WIN , CHINA IN SURPRI...   \n",
+              "1                                          Nadim Ladoi   \n",
+              "2             AL-AIN , United Arab Emirstes 1996-12-06   \n",
+              "3    Japan began the defence of their Asian Cup tit...   \n",
+              "4    But China saw their luck desert them in the se...   \n",
+              "..                                                 ...   \n",
+              "447                    portuguesa 1 atletico mineiro 0   \n",
+              "448     cricket - lara endures another miserable day .   \n",
+              "449                                      robert galvin   \n",
+              "450                               melbourne 1996-12-06   \n",
+              "451  australia gave brian lara another reason to be...   \n",
+              "\n",
+              "                                       expected_result  \\\n",
+              "0    soccer - japan get lucky win , china in surpri...   \n",
+              "1                                     nadim ladki: ORG   \n",
+              "2        al-ain , united arab emirates 1996-12-06: ORG   \n",
+              "3    japan began the defence of their asian cup tit...   \n",
+              "4    but china saw their luck desert them in the se...   \n",
+              "..                                                 ...   \n",
+              "447               portuguesa 1 atletico mineiro 0: ORG   \n",
+              "448  cricket - lara endures another miserable day: ORG   \n",
+              "449                           robert: PER, galvin: ORG   \n",
+              "450                          melbourne 1996-12-06: ORG   \n",
+              "451  australia: ORG, gave: PER, brian lara another ...   \n",
+              "\n",
+              "                                         actual_result  pass  \n",
+              "0    soccer - japan gey lucky win , china in surpri...  True  \n",
+              "1                                     nadim ladoi: ORG  True  \n",
+              "2        al-ain , united arab emirstes 1996-12-06: ORG  True  \n",
+              "3    japan began the defence of their asian cup tit...  True  \n",
+              "4    but china saw their luck desert them in the se...  True  \n",
+              "..                                                 ...   ...  \n",
+              "447               portuguesa 1 atletico mineiro 0: ORG  True  \n",
+              "448  cricket - lara endures another miserable day: ORG  True  \n",
+              "449                           robert: PER, galvin: ORG  True  \n",
+              "450                          melbourne 1996-12-06: ORG  True  \n",
+              "451  australia: ORG, gave: PER, brian lara another ...  True  \n",
+              "\n",
+              "[452 rows x 7 columns]"
             ]
           },
+          "execution_count": 22,
           "metadata": {},
-          "execution_count": 18
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -2170,32 +2039,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 19,
+      "execution_count": 23,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 112
         },
         "id": "JSqkrBOZ-TeG",
-        "outputId": "4dd9224f-f3a6-4fe6-a48d-ae7e8e87ccd2"
+        "outputId": "09d0d756-6567-4ff3-9509-87e96f6d462d"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "     category  test_type  fail_count  pass_count pass_rate minimum_pass_rate  \\\n",
-              "0  robustness   add_typo          46         180       80%               73%   \n",
-              "1  robustness  lowercase           0         226      100%               65%   \n",
-              "\n",
-              "   pass  \n",
-              "0  True  \n",
-              "1  True  "
-            ],
             "text/html": [
               "\n",
               "\n",
-              "  <div id=\"df-3d5434ae-9ac3-4db2-9b49-7dffffa9f87d\">\n",
+              "  <div id=\"df-ab9776ee-499c-46e2-a27e-f67778be767e\">\n",
               "    <div class=\"colab-df-container\">\n",
               "      <div>\n",
               "<style scoped>\n",
@@ -2229,9 +2088,9 @@
               "      <th>0</th>\n",
               "      <td>robustness</td>\n",
               "      <td>add_typo</td>\n",
-              "      <td>46</td>\n",
-              "      <td>180</td>\n",
-              "      <td>80%</td>\n",
+              "      <td>29</td>\n",
+              "      <td>197</td>\n",
+              "      <td>87%</td>\n",
               "      <td>73%</td>\n",
               "      <td>True</td>\n",
               "    </tr>\n",
@@ -2248,7 +2107,7 @@
               "  </tbody>\n",
               "</table>\n",
               "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3d5434ae-9ac3-4db2-9b49-7dffffa9f87d')\"\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ab9776ee-499c-46e2-a27e-f67778be767e')\"\n",
               "              title=\"Convert this dataframe to an interactive table.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -2261,8 +2120,8 @@
               "\n",
               "\n",
               "\n",
-              "    <div id=\"df-fa24bd05-ecf5-405c-90f2-a0102839f9e9\">\n",
-              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-fa24bd05-ecf5-405c-90f2-a0102839f9e9')\"\n",
+              "    <div id=\"df-175d0d7f-99f4-40cb-b06c-b498f39681eb\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-175d0d7f-99f4-40cb-b06c-b498f39681eb')\"\n",
               "              title=\"Suggest charts.\"\n",
               "              style=\"display:none;\">\n",
               "\n",
@@ -2319,7 +2178,7 @@
               "\n",
               "function displayQuickchartButton(domScope) {\n",
               "  let quickchartButtonEl =\n",
-              "    domScope.querySelector('#df-fa24bd05-ecf5-405c-90f2-a0102839f9e9 button.colab-df-quickchart');\n",
+              "    domScope.querySelector('#df-175d0d7f-99f4-40cb-b06c-b498f39681eb button.colab-df-quickchart');\n",
               "  quickchartButtonEl.style.display =\n",
               "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "}\n",
@@ -2366,12 +2225,12 @@
               "\n",
               "      <script>\n",
               "        const buttonEl =\n",
-              "          document.querySelector('#df-3d5434ae-9ac3-4db2-9b49-7dffffa9f87d button.colab-df-convert');\n",
+              "          document.querySelector('#df-ab9776ee-499c-46e2-a27e-f67778be767e button.colab-df-convert');\n",
               "        buttonEl.style.display =\n",
               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-3d5434ae-9ac3-4db2-9b49-7dffffa9f87d');\n",
+              "          const element = document.querySelector('#df-ab9776ee-499c-46e2-a27e-f67778be767e');\n",
               "          const dataTable =\n",
               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                     [key], {});\n",
@@ -2390,10 +2249,20 @@
               "      </script>\n",
               "    </div>\n",
               "  </div>\n"
+            ],
+            "text/plain": [
+              "     category  test_type  fail_count  pass_count pass_rate minimum_pass_rate  \\\n",
+              "0  robustness   add_typo          29         197       87%               73%   \n",
+              "1  robustness  lowercase           0         226      100%               65%   \n",
+              "\n",
+              "   pass  \n",
+              "0  True  \n",
+              "1  True  "
             ]
           },
+          "execution_count": 23,
           "metadata": {},
-          "execution_count": 19
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -2418,4 +2287,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
\ No newline at end of file
+}

From c9e02871f0589c0147d0769e72cf6a30dd0d1945 Mon Sep 17 00:00:00 2001
From: JulesBelveze <jules.belveze@hotmail.fr>
Date: Wed, 2 Aug 2023 10:27:31 +0200
Subject: [PATCH 18/21] fix(langtest): renamed parameter in augment method

---
 langtest/augmentation/__init__.py             | 128 +++++++++---------
 langtest/langtest.py                          |   9 +-
 .../pipelines/transformers/ner_pipeline.py    |   4 +-
 tests/test_augmentation.py                    |  16 +--
 4 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/langtest/augmentation/__init__.py b/langtest/augmentation/__init__.py
index 0eb50d6c3..1bb65406e 100644
--- a/langtest/augmentation/__init__.py
+++ b/langtest/augmentation/__init__.py
@@ -1,18 +1,19 @@
-from collections import defaultdict
 import os
+import random
 import re
 import string
-import yaml
-import random
-import pandas as pd
-from typing import List, Dict, Union, Optional
 from abc import ABC, abstractmethod
+from collections import defaultdict
 from copy import deepcopy as copy
+from typing import Any, Dict, List, Optional, Union
+
+import pandas as pd
+import yaml
 
-from langtest.transform import TestFactory
-from langtest.utils.custom_types import Sample
 from langtest.datahandler.datasource import DataFactory, HuggingFaceDataset
+from langtest.transform import TestFactory
 from langtest.transform.utils import create_terminology
+from langtest.utils.custom_types import Sample
 from langtest.utils.custom_types.output import NEROutput
 from langtest.utils.custom_types.predictions import NERPrediction, SequenceLabel
 from langtest.utils.custom_types.sample import NERSample
@@ -57,7 +58,6 @@ class AugmentRobustness(BaseAugmentaion):
 
         suggestions(self, prop) -> pandas.DataFrame:
             Calculates suggestions for improving test performance based on a given report.
-
     """
 
     def __init__(
@@ -109,8 +109,9 @@ def fix(
                                         - 'add': Adds new samples to the input data.
                                         - 'transformed': Exports only the transformed data, excluding untransformed samples.
                                         Defaults to 'add'.
-            Returns:
-        List[Dict[str, Any]]: A list of augmented data samples.
+
+        Returns:
+            List[Dict[str, Any]]: A list of augmented data samples.
         """
         if "." not in training_data["data_source"]:
             self.df = HuggingFaceDataset(training_data["data_source"], self.task)
@@ -290,25 +291,27 @@ def _parameters_overrides(self, config: dict, data_handler: List[Sample]) -> dic
 
 
 class TemplaticAugment(BaseAugmentaion):
-    """
-    This class is used for templatic augmentation. It is a subclass of the BaseAugmentation class.
+    """This class is used for templatic augmentation. It is a subclass of the BaseAugmentation class.
 
     Attributes:
-    __templates: A string or a list of strings or samples that represents the templates for the augmentation.
-    __task: The task for which the augmentation is being performed.
+        __templates:
+            A string or a list of strings or samples that represents the templates for the augmentation.
+        __task:
+            The task for which the augmentation is being performed.
 
     Methods:
-    __init__(self, templates: Union[str, List[str]], task: str): Initializes the TemplaticAugment class.
-    fix(self, training_data: str, output_path: str, *args, **kwargs): Performs the templatic augmentation and exports the results to a specified path.
+        __init__(self, templates: Union[str, List[str]], task: str):
+            Initializes the TemplaticAugment class.
+        fix(self, training_data: str, output_path: str, *args, **kwargs):
+            Performs the templatic augmentation and exports the results to a specified path.
     """
 
     def __init__(self, templates: Union[str, List[str]], task: str) -> None:
-        """
-        This constructor for the TemplaticAugment class.
+        """This constructor for the TemplaticAugment class.
 
-        Parameters:
-        templates (Union[str, List[str]]): The templates to be used for the augmentation.
-        task (str): The task for which the augmentation is being performed.
+        Args:
+            templates (Union[str, List[str]]): The templates to be used for the augmentation.
+            task (str): The task for which the augmentation is being performed.
         """
         self.__templates: Union[str, List[str], List[Sample]] = templates
         self.__task = task
@@ -320,21 +323,27 @@ def __init__(self, templates: Union[str, List[str]], task: str) -> None:
         elif isinstance(self.__templates, list) and isinstance(self.__templates[0], str):
             self.__templates = [self.str_to_sample(i) for i in self.__templates]
 
-    def fix(self, training_data: str, output_path: str, max_num=None, *args, **kwargs):
-        """
-        This method is used to perform the templatic augmentation.
+    def fix(
+        self,
+        training_data: Dict[str, Any],
+        output_path: str,
+        max_num=None,
+        *args,
+        **kwargs,
+    ):
+        """This method is used to perform the templatic augmentation.
+
         It takes the input data, performs the augmentation and then saves the augmented data to the output path.
 
-        Parameters:
-        training_data (dict): A dictionary containing the input data for augmentation.
-        output_path (str): The path where the augmented data will be saved.
-        *args: Variable length argument list.
-        **kwargs: Arbitrary keyword arguments.
+        Args:
+            training_data (dict): A dictionary containing the input data for augmentation.
+            output_path (str): The path where the augmented data will be saved.
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
 
         Returns:
-        bool: Returns True upon successful completion of the method.
+            bool: Returns True upon successful completion of the method.
         """
-
         df = DataFactory(training_data["data_source"], self.__task)
         data = df.load()
         new_data = []
@@ -355,15 +364,13 @@ def fix(self, training_data: str, output_path: str, max_num=None, *args, **kwarg
     def search_sample_results(
         self, samples: List[Sample]
     ) -> Dict[str, List[Union[NERPrediction, SequenceLabel]]]:
-        """
-        This method is used to search the results of the samples for the entities in the templates.
+        """This method is used to search the results of the samples for the entities in the templates.
 
-        Parameters:
-        samples (List[Sample]): The samples for which the results are to be searched.
+        Args:
+            samples (List[Sample]): The samples for which the results are to be searched.
 
         Returns:
-        Dict[str, List[Union[NERPrediction, SequenceLabel]]]: A dictionary containing the search results.
-
+            Dict[str, List[Union[NERPrediction, SequenceLabel]]]: A dictionary containing the search results.
         """
         results_dict = defaultdict(list)
         for sample in samples:
@@ -386,16 +393,15 @@ def search_sample_results(
                 results_dict[ent_name].append(tuple(chunk))
         return results_dict
 
-    def extract_variable_names(self, f_string: str):
-        """
-        This method is used to extract the variable names from the templates.
+    @staticmethod
+    def extract_variable_names(f_string: str) -> List[str]:
+        """This method is used to extract the variable names from the templates.
 
-        Parameters:
-        f_string (str): The template string.
+        Args:
+            f_string (str): The template string.
 
         Returns:
-        List[str]: A list of variable names.
-
+            List[str]: A list of variable names.
         """
         pattern = r"{([^{}]*)}"
         matches = re.findall(pattern, f_string)
@@ -403,15 +409,13 @@ def extract_variable_names(self, f_string: str):
         return variable_names
 
     def new_sample(self, template: Sample):
-        """
-        This method is used to generate a new sample from a template.
+        """This method is used to generate a new sample from a template.
 
-        Parameters:
-        template (Sample): The template from which the new sample is to be generated.
+        Args:
+            template (Sample): The template from which the new sample is to be generated.
 
         Returns:
-        Sample: The new sample generated from the template.
-
+            Sample: The new sample generated from the template.
         """
         template = copy(template)
         matches = re.finditer(r"{([^{}]*)}", template.original)
@@ -449,15 +453,13 @@ def new_sample(self, template: Sample):
             return None
 
     def str_to_sample(self, template: str):
-        """
-        This method is used to convert a template string to a Sample object.
+        """This method is used to convert a template string to a Sample object.
 
-        Parameters:
-        template (str): The template string to be converted.
+        Args:
+            template (str): The template string to be converted.
 
         Returns:
-        Sample: The Sample object generated from the template string.
-
+            Sample: The Sample object generated from the template string.
         """
         if self.__task == "ner":
             template = self.add_spaces_around_punctuation(template)
@@ -493,6 +495,7 @@ def str_to_sample(self, template: str):
 
     @property
     def templates(self):
+        """"""
         return self.__templates
 
     @templates.setter
@@ -501,21 +504,22 @@ def templates(self, templates: Union[str, List[str]]):
 
     @property
     def task(self):
+        """"""
         return self.__task
 
     @task.setter
     def task(self, task: str):
         self.__task = task
 
-    def add_spaces_around_punctuation(self, text: str):
-        """
-        This method is used to add spaces around punctuation in a string.
+    @staticmethod
+    def add_spaces_around_punctuation(text: str):
+        """This method is used to add spaces around punctuation in a string.
 
-        Parameters:
-        text (str): The string to which spaces are to be added.
+        Args:
+            text (str): The string to which spaces are to be added.
 
         Returns:
-        str: The string with spaces added around punctuation.
+            str: The string with spaces added around punctuation.
         """
         for punct in string.punctuation:
             if punct not in ["{", "}", "_"]:
diff --git a/langtest/langtest.py b/langtest/langtest.py
index a5a33ddeb..6d056852f 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -698,7 +698,7 @@ def generated_results(self) -> Optional[pd.DataFrame]:
     def augment(
         self,
         training_data: dict,
-        augmented_data: str,
+        save_data_path: str,
         custom_proportions: Union[Dict, List] = None,
         export_mode: str = "add",
         templates: Optional[Union[str, List[str]]] = None,
@@ -707,13 +707,14 @@ def augment(
 
         Args:
             training_data (dict): A dictionary containing the input data for augmentation.
-            augmented_data (str): Path to save the augmented data.
+            save_data_path (str): Path to save the augmented data.
             custom_proportions (Union[Dict, List]):
             export_mode (str, optional): Determines how the samples are modified or exported.
                                     - 'inplace': Modifies the list of samples in place.
                                     - 'add': Adds new samples to the input data.
                                     - 'transformed': Exports only the transformed data, excluding untransformed samples.
                                     Defaults to 'add'.
+            templates (Optional[Union[str, List[str]]]):
 
         Returns:
             Harness: The instance of the class calling this method.
@@ -763,7 +764,7 @@ def augment(
             _ = TemplaticAugment(
                 templates=templates,
                 task=self.task,
-            ).fix(training_data=training_data, output_path=augmented_data)
+            ).fix(training_data=training_data, output_path=save_data_path)
 
         else:
             _ = AugmentRobustness(
@@ -773,7 +774,7 @@ def augment(
                 custom_proportions=custom_proportions,
             ).fix(
                 training_data=training_data,
-                output_path=augmented_data,
+                output_path=save_data_path,
                 export_mode=export_mode,
             )
 
diff --git a/langtest/pipelines/transformers/ner_pipeline.py b/langtest/pipelines/transformers/ner_pipeline.py
index dc13b7f68..d4c1e7d3b 100644
--- a/langtest/pipelines/transformers/ner_pipeline.py
+++ b/langtest/pipelines/transformers/ner_pipeline.py
@@ -173,8 +173,8 @@ def augment(self):
         filename = os.path.basename(self.train_data)
         self.path_augmented_file = os.path.join(os.getcwd(), f"augmented_{filename}")
         self.harness.augment(
-            input_path=self.train_data,
-            output_path=self.path_augmented_file,
+            training_data={"data_source": self.train_data},
+            save_data_path=self.path_augmented_file,
             export_mode="add",
         )
 
diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 26e3099c4..41fd92c68 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -105,7 +105,7 @@ def test_hf_ner_augmentation(self):
             training_data={
                 "data_source": "tests/fixtures/train.conll",
             },
-            augmented_data="tests/fixtures/augmentated_train.conll",
+            save_data_path="tests/fixtures/augmentated_train.conll",
             export_mode="inplace",
         )
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()
@@ -121,7 +121,7 @@ def test_spacy_ner_augmentation(self):
 
         harness.augment(
             training_data={"data_source": "tests/fixtures/train.conll"},
-            augmented_data="tests/fixtures/augmentated_train.conll",
+            save_data_path="tests/fixtures/augmentated_train.conll",
             export_mode="inplace",
         )
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()
@@ -139,7 +139,7 @@ def test_custom_proportions_augment_harness(self):
 
         harness.augment(
             training_data={"data_source": "tests/fixtures/train.conll"},
-            augmented_data="tests/fixtures/augmentated_train.conll",
+            save_data_path="tests/fixtures/augmentated_train.conll",
             custom_proportions=proportions,
             export_mode="inplace",
         )
@@ -156,7 +156,7 @@ def test_templatic_augmentation(self):
         )
         self.assertIsInstance(generator, TemplaticAugment)
         generator.fix(
-            training_data={"data_source": "tests/fixtures/train.conll"},
+            save_data_path={"data_source": "tests/fixtures/train.conll"},
             output_path="tests/fixtures/augmentated_train.conll",
         )
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()
@@ -172,7 +172,7 @@ def test_spacy_templatic_augmentation(self):
 
         harness.augment(
             training_data={"data_source": "tests/fixtures/train.conll"},
-            augmented_data="tests/fixtures/augmentated_train.conll",
+            save_data_path="tests/fixtures/augmentated_train.conll",
             templates=["I living in {LOC}", "you are working in {ORG}"],
         )
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()
@@ -189,7 +189,7 @@ def test_csv_dataset_textclassification_hf(self):
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
             training_data={"data_source": "tests/fixtures/text_classification.csv"},
-            augmented_data="tests/fixtures/augmented_text_classification.csv",
+            save_data_path="tests/fixtures/augmented_text_classification.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",
         )
@@ -209,7 +209,7 @@ def test_hf_dataset_textclassification_hf(self):
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
             training_data={"data_source": "imdb"},
-            augmented_data="tests/fixtures/augmented_train_transformed.csv",
+            save_data_path="tests/fixtures/augmented_train_transformed.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",
         )
@@ -229,7 +229,7 @@ def test_hf_dataset_textclassification_spacy(self):
         custom_proportions = {"uppercase": 0.8, "lowercase": 0.8}
         harness.augment(
             training_data={"data_source": "imdb"},
-            augmented_data="tests/fixtures/augmented_train_transformed.csv",
+            save_data_path="tests/fixtures/augmented_train_transformed.csv",
             custom_proportions=custom_proportions,
             export_mode="transformed",
         )

From b78767946ac4a9006f10353621e166d0d7cacd6c Mon Sep 17 00:00:00 2001
From: Rakshit Khajuria <rakshitraina1234@gmail.com>
Date: Wed, 2 Aug 2023 14:09:03 +0530
Subject: [PATCH 19/21] updated parameters in website

---
 docs/pages/docs/generate_augmentation.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/pages/docs/generate_augmentation.md b/docs/pages/docs/generate_augmentation.md
index e63f40153..a3c6dc79b 100644
--- a/docs/pages/docs/generate_augmentation.md
+++ b/docs/pages/docs/generate_augmentation.md
@@ -16,7 +16,7 @@ Several parameters are available:
 
 - **`training_data`**: (Required) Specifies the source of the original training data. It should be a dictionary containing the necessary information about the dataset.
 
-- **`augmented_data`**: (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.
+- **`save_data_path`**: (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.
 
 - **`custom_proportions`**: (Required) custom_proportions is a dictionary with augmentation on test type as key and proportion as value. The proportion is the percentage of the test cases that will be augmented with the given augmentation type.
 
@@ -37,7 +37,7 @@ data_kwargs = {
 
 h.augment(
     training_data = data_kwargs,
-    augmented_data ="augmented_conll03.conll",
+    save_data_path ="augmented_conll03.conll",
     custom_proportions=custom_proportions,
     export_mode="transformed")
 ```
@@ -90,7 +90,7 @@ data_kwargs = {
 
 harness.augment(
     training_data = data_kwargs,
-    augmented_data ="augmented_glue.csv",
+    save_data_path ="augmented_glue.csv",
     custom_proportions=custom_proportions,
     export_mode="add",
 )

From ccc50522a219d5aa2cec949dd02db9a418dc4a38 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Wed, 2 Aug 2023 14:19:16 +0530
Subject: [PATCH 20/21] param updated in  notebook

---
 demo/tutorials/misc/Augmentation_Control_Notebook.ipynb   | 6 +++---
 demo/tutorials/misc/Templatic_Augmentation_Notebook.ipynb | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb b/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb
index 46a25953b..ec4eac11c 100644
--- a/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb
+++ b/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb
@@ -1444,7 +1444,7 @@
         "1. `training_data` (dict): (Required) Specifies the source of the original training data. It should be a dictionary containing the necessary information about the dataset.\n",
         "    - Example: `{\"data_source\": \"conll03.conll\"}`\n",
         "\n",
-        "2. `augmented_data` (str): (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.\n",
+        "2. `save_data_path` (str): (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.\n",
         "    - Example: `augmented_conll03.conll`\n",
         "\n",
         "3. `custom_proportions` (dict): (Required) custom_proportions is a dictionary with augmentation on test type as key and proportion as value. The proportion is the percentage of the test cases that will be augmented with the given augmentation type.\n",
@@ -1489,7 +1489,7 @@
         "\n",
         "harness.augment(\n",
         "    training_data = data_kwargs,\n",
-        "    augmented_data =\"augmented_conll03.conll\",\n",
+        "    save_data_path =\"augmented_conll03.conll\",\n",
         "    custom_proportions=custom_proportions,\n",
         "    export_mode=\"transformed\")"
       ]
@@ -3159,7 +3159,7 @@
         "\n",
         "harness.augment(\n",
         "    training_data = data_kwargs,\n",
-        "    augmented_data =\"augmented_glue.csv\",\n",
+        "    save_data_path =\"augmented_glue.csv\",\n",
         "    custom_proportions=custom_proportions,\n",
         "    export_mode=\"add\",\n",
         ")"
diff --git a/demo/tutorials/misc/Templatic_Augmentation_Notebook.ipynb b/demo/tutorials/misc/Templatic_Augmentation_Notebook.ipynb
index 1bf1dddfd..06265ec17 100644
--- a/demo/tutorials/misc/Templatic_Augmentation_Notebook.ipynb
+++ b/demo/tutorials/misc/Templatic_Augmentation_Notebook.ipynb
@@ -1451,7 +1451,7 @@
         "The `.augment()` function takes the following parameters:\n",
         "\n",
         "- `training_data` (dict): (Required) Specifies the source of the original training data. It should be a dictionary containing the necessary information about the dataset.\n",
-        "- `augmented_data` (str): (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.\n",
+        "- `save_data_path` (str): (Required) Name of the file to store the augmented data. The augmented dataset will be saved in this file.\n",
         "- `templates` (list): List of templates(string) or conll file to be used for augmentation."
       ]
     },
@@ -1482,7 +1482,7 @@
         "\n",
         "harness.augment(\n",
         "    training_data=data_kwargs,\n",
-        "    augmented_data='augmented_conll03.conll',\n",
+        "    save_data_path='augmented_conll03.conll',\n",
         "    templates=[\"The {ORG} company is located in {LOC}\", \"The {ORG} company is located in {LOC} and is owned by {PER}\"],\n",
         "    )"
       ]

From c9dfa98c7871d6d01607899e6e6cf5e9820f11c2 Mon Sep 17 00:00:00 2001
From: Prikshit7766 <prikshitsharma8024@gmail.com>
Date: Wed, 2 Aug 2023 15:44:05 +0530
Subject: [PATCH 21/21] param updated test_augmentation.py

---
 tests/test_augmentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py
index 41fd92c68..4961a7bca 100644
--- a/tests/test_augmentation.py
+++ b/tests/test_augmentation.py
@@ -156,7 +156,7 @@ def test_templatic_augmentation(self):
         )
         self.assertIsInstance(generator, TemplaticAugment)
         generator.fix(
-            save_data_path={"data_source": "tests/fixtures/train.conll"},
+            training_data={"data_source": "tests/fixtures/train.conll"},
             output_path="tests/fixtures/augmentated_train.conll",
         )
         is_file_exist = pl.Path("tests/fixtures/augmentated_train.conll").is_file()