Merge pull request #682 from JohnSnowLabs/add-unittests-for-fairness-…

…classes Pytest for fairness class
JohnSnowLabs · Jul 31, 2023 · 250c05a · 250c05a
2 parents d9621ed + 224d6e6
commit 250c05a
Show file tree

Hide file tree

Showing 3 changed files with 194 additions and 15 deletions.
diff --git a/langtest/transform/__init__.py b/langtest/transform/__init__.py
@@ -832,9 +832,9 @@ def transform(self) -> List[Sample]:
                 lambda x: x.split("-")[-1] if isinstance(x, str) else x
             )
             y_true = y_true.dropna()
-            params["test_name"] = test_name
+
             transformed_samples = self.supported_tests[test_name].transform(
-                y_true, params
+                test_name, y_true, params
             )
 
             for sample in transformed_samples:

diff --git a/langtest/transform/fairness.py b/langtest/transform/fairness.py
@@ -85,18 +85,26 @@ class MinGenderF1Score(BaseFairness):
             Transforms the input data into an output based on the minimum F1 score.
     """
 
-    alias_name = "min_gender_f1_score"
+    alias_name = ["min_gender_f1_score"]
 
-    @staticmethod
-    def transform(data: List[Sample], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, data: List[Sample], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the minimum F1 score for the given data.
 
         Args:
+            test (str): name of the test
             data (List[Sample]): The input data to be transformed.
-            params (Dict): parameters for tests configuration
+            params (Dict): parameters for tests configuration.
         Returns:
             List[MinScoreSample]: The transformed data based on the minimum F1 score.
         """
+
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
+
         if isinstance(params["min_score"], dict):
             min_scores = params["min_score"]
         elif isinstance(params["min_score"], float):
@@ -163,18 +171,24 @@ class MaxGenderF1Score(BaseFairness):
             Transforms the input data into an output based on the maximum F1 score.
     """
 
-    alias_name = "max_gender_f1_score"
+    alias_name = ["max_gender_f1_score"]
 
-    @staticmethod
-    def transform(data: List[Sample], params: Dict) -> List[MaxScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, data: List[Sample], params: Dict
+    ) -> List[MaxScoreSample]:
         """Computes the maximum F1 score for the given data.
 
         Args:
+            test (str): name of the test.
             data (List[Sample]): The input data to be transformed.
             params (Dict): parameters for tests configuration
         Returns:
             List[MaxScoreSample]: The transformed data based on the maximum F1 score.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
         if isinstance(params["max_score"], dict):
             max_scores = params["max_score"]
         elif isinstance(params["max_score"], float):
@@ -250,16 +264,23 @@ class MinGenderRougeScore(BaseFairness):
     ]
     supported_tasks = ["question-answering", "summarization"]
 
-    @staticmethod
-    def transform(data: List[Sample], params: Dict) -> List[MinScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, data: List[Sample], params: Dict
+    ) -> List[MinScoreSample]:
         """Computes the min rouge score for the given data.
 
         Args:
+            test (str): name of the test.
             data (List[Sample]): The input data to be transformed.
             params (Dict): parameters for tests configuration
         Returns:
             List[MinScoreSample]: The transformed data based on the minimum F1 score.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
+
         if isinstance(params["min_score"], dict):
             min_scores = params["min_score"]
         elif isinstance(params["min_score"], float):
@@ -274,7 +295,7 @@ def transform(data: List[Sample], params: Dict) -> List[MinScoreSample]:
             sample = MinScoreSample(
                 original=None,
                 category="fairness",
-                test_type=params["test_name"],
+                test_type=test,
                 test_case=key,
                 expected_results=MinScoreOutput(min_score=val),
             )
@@ -343,16 +364,23 @@ class MaxGenderRougeScore(BaseFairness):
     ]
     supported_tasks = ["question-answering", "summarization"]
 
-    @staticmethod
-    def transform(data: List[Sample], params: Dict) -> List[MaxScoreSample]:
+    @classmethod
+    def transform(
+        cls, test: str, data: List[Sample], params: Dict
+    ) -> List[MaxScoreSample]:
         """Computes the rouge score for the given data.
 
         Args:
+            test (str): name of the test.
             data (List[Sample]): The input data to be transformed.
             params (Dict): parameters for tests configuration
         Returns:
             List[MaxScoreSample]: The transformed data based on the rouge score.
         """
+        assert (
+            test in cls.alias_name
+        ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
+
         if isinstance(params["max_score"], dict):
             max_scores = params["max_score"]
         elif isinstance(params["max_score"], float):
@@ -367,7 +395,7 @@ def transform(data: List[Sample], params: Dict) -> List[MaxScoreSample]:
             sample = MaxScoreSample(
                 original=None,
                 category="fairness",
-                test_type=params["test_name"],
+                test_type=test,
                 test_case=key,
                 expected_results=MaxScoreOutput(max_score=val),
             )

diff --git a/tests/test_fairness.py b/tests/test_fairness.py
@@ -0,0 +1,151 @@
+import pytest
+
+from langtest.transform.fairness import (
+    BaseFairness,
+    MinGenderF1Score,
+    MaxGenderF1Score,
+    MinGenderRougeScore,
+    MaxGenderRougeScore,
+)
+from langtest.utils.custom_types import SequenceLabel, Span
+from langtest.utils.custom_types.output import (
+    NEROutput,
+    NERPrediction,
+    SequenceClassificationOutput,
+    TranslationOutput,
+)
+from langtest.utils.custom_types.sample import (
+    MinScoreQASample,
+    MaxScoreQASample,
+    MaxScoreSample,
+    MinScoreSample,
+    NERSample,
+    QASample,
+    SequenceClassificationSample,
+    SummarizationSample,
+    ToxicitySample,
+    TranslationSample,
+)
+
+
+class Testfairness:
+    """A test suite for evaluating the transformation process of various fairnesss.
+
+    This test suite ensures that the fairnesss can successfully transform input data
+    and produce valid results.
+
+    The fairnesss tested include Genderfairness, Ethnicityfairness,
+    Religionfairness, and CountryEconomicfairness.
+
+    Attributes:
+        fairness_config (Dict)
+    """
+
+    fairness_config = {
+        "min_gender_f1_score": {"min_score": 0.66},
+        "max_gender_f1_score": {"max_score": 0.60},
+        "min_gender_rouge1_score": {"min_score": 0.66},
+        "min_gender_rouge2_score": {"min_score": 0.60},
+        "min_gender_rougeL_score": {"min_score": 0.66},
+        "min_gender_rougeLsum_score": {"min_score": 0.66},
+        "max_gender_rouge1_score": {"max_score": 0.66},
+        "max_gender_rouge2_score": {"max_score": 0.60},
+        "max_gender_rougeL_score": {"max_score": 0.66},
+        "max_gender_rougeLsum_score": {"max_score": 0.66},
+    }
+
+    @pytest.fixture
+    def sample_data(self):
+        """A fixture providing sample data for the fairness transformation tests.
+
+        Returns:
+            list: A list containing sample SequenceClassificationSample instances.
+        """
+        return {
+            "text-classification": [
+                SequenceClassificationSample(
+                    original="The last good ernest movie, and the best at that. how can you not laugh at least once at this movie. the last line is a classic, as is ernest's gangster impressions, his best moment on film. this has his best lines and is a crowning achievement among the brainless screwball comedies.",
+                    expected_results=SequenceClassificationOutput(
+                        predictions=[SequenceLabel(label="Positive", score=1.0)]
+                    ),
+                ),
+                SequenceClassificationSample(
+                    original="After my 6 year old daughter began taking riding lessons I started looking for horse movies for her. I had always heard of National Velvet but had never seen it. Boy am I glad I bought it! It's become a favorite of mine, my 6 year old AND my 2 year old. It's a shame movies like this aren't made anymore.",
+                    expected_results=SequenceClassificationOutput(
+                        predictions=[SequenceLabel(label="Positive", score=1.0)]
+                    ),
+                ),
+            ],
+            "ner": [
+                NERSample(
+                    original="Attendance : 3,000",
+                    expected_results=NEROutput(
+                        predictions=[
+                            NERPrediction(
+                                entity="CARDINAL",
+                                span=Span(start=13, end=18, word="3,000"),
+                            )
+                        ]
+                    ),
+                ),
+                NERSample(
+                    original="I do not love KFC",
+                    expected_results=NEROutput(
+                        predictions=[
+                            NERPrediction(
+                                entity="PROD", span=Span(start=14, end=17, word="KFC")
+                            )
+                        ]
+                    ),
+                ),
+            ],
+            "question-answering": [
+                QASample(
+                    original_question="What is John Snow Labs?",
+                    original_context="John Snow Labs is a healthcare company specializing in accelerating progress in data science.",
+                    expected_results="A healthcare company specializing in accelerating progress in data science. ",
+                )
+            ],
+            "summarization": [
+                SummarizationSample(
+                    original="John Snow Labs is a healthcare company specializing in accelerating progress in data "
+                    "science.",
+                    expected_results="JSL is a data science company",
+                )
+            ],
+        }
+
+    @pytest.mark.parametrize(
+        "fairness",
+        [
+            MinGenderF1Score,
+            MaxGenderF1Score,
+            MinGenderRougeScore,
+            MaxGenderRougeScore,
+        ],
+    )
+    def test_transform(self, fairness: BaseFairness, sample_data) -> None:
+        """
+        Test case for fairness classes.
+
+        Args:
+            fairness (Type[fairness]): The fairness class to be tested.
+            sample_data (List]): A list containing sample instances.
+
+        Returns:
+            None
+
+        Raises:
+            AssertionError: If the transformation or the final result is invalid.
+        """
+        for alias in fairness.alias_name:
+            for task in fairness.supported_tasks:
+                transform_results = fairness.transform(
+                    alias, sample_data[task], self.fairness_config[alias]
+                )
+                assert isinstance(transform_results, list)
+
+                for _, result in zip(sample_data, transform_results):
+                    assert isinstance(result, MaxScoreSample) or isinstance(
+                        result, MinScoreSample
+                    )