diff --git a/langtest/transform/__init__.py b/langtest/transform/__init__.py index a4c29abb7..61d493e31 100644 --- a/langtest/transform/__init__.py +++ b/langtest/transform/__init__.py @@ -842,9 +842,9 @@ def transform(self) -> List[Sample]: lambda x: x.split("-")[-1] if isinstance(x, str) else x ) y_true = y_true.dropna() - params["test_name"] = test_name + transformed_samples = self.supported_tests[test_name].transform( - y_true, params + test_name, y_true, params ) end_time = time.time_ns() for sample in transformed_samples: diff --git a/langtest/transform/fairness.py b/langtest/transform/fairness.py index 645bd2697..f34de1e98 100644 --- a/langtest/transform/fairness.py +++ b/langtest/transform/fairness.py @@ -85,18 +85,26 @@ class MinGenderF1Score(BaseFairness): Transforms the input data into an output based on the minimum F1 score. """ - alias_name = "min_gender_f1_score" + alias_name = ["min_gender_f1_score"] - @staticmethod - def transform(data: List[Sample], params: Dict) -> List[MinScoreSample]: + @classmethod + def transform( + cls, test: str, data: List[Sample], params: Dict + ) -> List[MinScoreSample]: """Computes the minimum F1 score for the given data. Args: + test (str): name of the test data (List[Sample]): The input data to be transformed. - params (Dict): parameters for tests configuration + params (Dict): parameters for tests configuration. Returns: List[MinScoreSample]: The transformed data based on the minimum F1 score. """ + + assert ( + test in cls.alias_name + ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'" + if isinstance(params["min_score"], dict): min_scores = params["min_score"] elif isinstance(params["min_score"], float): @@ -163,18 +171,24 @@ class MaxGenderF1Score(BaseFairness): Transforms the input data into an output based on the maximum F1 score. """ - alias_name = "max_gender_f1_score" + alias_name = ["max_gender_f1_score"] - @staticmethod - def transform(data: List[Sample], params: Dict) -> List[MaxScoreSample]: + @classmethod + def transform( + cls, test: str, data: List[Sample], params: Dict + ) -> List[MaxScoreSample]: """Computes the maximum F1 score for the given data. Args: + test (str): name of the test. data (List[Sample]): The input data to be transformed. params (Dict): parameters for tests configuration Returns: List[MaxScoreSample]: The transformed data based on the maximum F1 score. """ + assert ( + test in cls.alias_name + ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'" if isinstance(params["max_score"], dict): max_scores = params["max_score"] elif isinstance(params["max_score"], float): @@ -250,16 +264,23 @@ class MinGenderRougeScore(BaseFairness): ] supported_tasks = ["question-answering", "summarization"] - @staticmethod - def transform(data: List[Sample], params: Dict) -> List[MinScoreSample]: + @classmethod + def transform( + cls, test: str, data: List[Sample], params: Dict + ) -> List[MinScoreSample]: """Computes the min rouge score for the given data. Args: + test (str): name of the test. data (List[Sample]): The input data to be transformed. params (Dict): parameters for tests configuration Returns: List[MinScoreSample]: The transformed data based on the minimum F1 score. """ + assert ( + test in cls.alias_name + ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'" + if isinstance(params["min_score"], dict): min_scores = params["min_score"] elif isinstance(params["min_score"], float): @@ -274,7 +295,7 @@ def transform(data: List[Sample], params: Dict) -> List[MinScoreSample]: sample = MinScoreSample( original=None, category="fairness", - test_type=params["test_name"], + test_type=test, test_case=key, expected_results=MinScoreOutput(min_score=val), ) @@ -343,16 +364,23 @@ class MaxGenderRougeScore(BaseFairness): ] supported_tasks = ["question-answering", "summarization"] - @staticmethod - def transform(data: List[Sample], params: Dict) -> List[MaxScoreSample]: + @classmethod + def transform( + cls, test: str, data: List[Sample], params: Dict + ) -> List[MaxScoreSample]: """Computes the rouge score for the given data. Args: + test (str): name of the test. data (List[Sample]): The input data to be transformed. params (Dict): parameters for tests configuration Returns: List[MaxScoreSample]: The transformed data based on the rouge score. """ + assert ( + test in cls.alias_name + ), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'" + if isinstance(params["max_score"], dict): max_scores = params["max_score"] elif isinstance(params["max_score"], float): @@ -367,7 +395,7 @@ def transform(data: List[Sample], params: Dict) -> List[MaxScoreSample]: sample = MaxScoreSample( original=None, category="fairness", - test_type=params["test_name"], + test_type=test, test_case=key, expected_results=MaxScoreOutput(max_score=val), ) diff --git a/tests/test_fairness.py b/tests/test_fairness.py new file mode 100644 index 000000000..f2e17abf7 --- /dev/null +++ b/tests/test_fairness.py @@ -0,0 +1,151 @@ +import pytest + +from langtest.transform.fairness import ( + BaseFairness, + MinGenderF1Score, + MaxGenderF1Score, + MinGenderRougeScore, + MaxGenderRougeScore, +) +from langtest.utils.custom_types import SequenceLabel, Span +from langtest.utils.custom_types.output import ( + NEROutput, + NERPrediction, + SequenceClassificationOutput, + TranslationOutput, +) +from langtest.utils.custom_types.sample import ( + MinScoreQASample, + MaxScoreQASample, + MaxScoreSample, + MinScoreSample, + NERSample, + QASample, + SequenceClassificationSample, + SummarizationSample, + ToxicitySample, + TranslationSample, +) + + +class Testfairness: + """A test suite for evaluating the transformation process of various fairnesss. + + This test suite ensures that the fairnesss can successfully transform input data + and produce valid results. + + The fairnesss tested include Genderfairness, Ethnicityfairness, + Religionfairness, and CountryEconomicfairness. + + Attributes: + fairness_config (Dict) + """ + + fairness_config = { + "min_gender_f1_score": {"min_score": 0.66}, + "max_gender_f1_score": {"max_score": 0.60}, + "min_gender_rouge1_score": {"min_score": 0.66}, + "min_gender_rouge2_score": {"min_score": 0.60}, + "min_gender_rougeL_score": {"min_score": 0.66}, + "min_gender_rougeLsum_score": {"min_score": 0.66}, + "max_gender_rouge1_score": {"max_score": 0.66}, + "max_gender_rouge2_score": {"max_score": 0.60}, + "max_gender_rougeL_score": {"max_score": 0.66}, + "max_gender_rougeLsum_score": {"max_score": 0.66}, + } + + @pytest.fixture + def sample_data(self): + """A fixture providing sample data for the fairness transformation tests. + + Returns: + list: A list containing sample SequenceClassificationSample instances. + """ + return { + "text-classification": [ + SequenceClassificationSample( + original="The last good ernest movie, and the best at that. how can you not laugh at least once at this movie. the last line is a classic, as is ernest's gangster impressions, his best moment on film. this has his best lines and is a crowning achievement among the brainless screwball comedies.", + expected_results=SequenceClassificationOutput( + predictions=[SequenceLabel(label="Positive", score=1.0)] + ), + ), + SequenceClassificationSample( + original="After my 6 year old daughter began taking riding lessons I started looking for horse movies for her. I had always heard of National Velvet but had never seen it. Boy am I glad I bought it! It's become a favorite of mine, my 6 year old AND my 2 year old. It's a shame movies like this aren't made anymore.", + expected_results=SequenceClassificationOutput( + predictions=[SequenceLabel(label="Positive", score=1.0)] + ), + ), + ], + "ner": [ + NERSample( + original="Attendance : 3,000", + expected_results=NEROutput( + predictions=[ + NERPrediction( + entity="CARDINAL", + span=Span(start=13, end=18, word="3,000"), + ) + ] + ), + ), + NERSample( + original="I do not love KFC", + expected_results=NEROutput( + predictions=[ + NERPrediction( + entity="PROD", span=Span(start=14, end=17, word="KFC") + ) + ] + ), + ), + ], + "question-answering": [ + QASample( + original_question="What is John Snow Labs?", + original_context="John Snow Labs is a healthcare company specializing in accelerating progress in data science.", + expected_results="A healthcare company specializing in accelerating progress in data science. ", + ) + ], + "summarization": [ + SummarizationSample( + original="John Snow Labs is a healthcare company specializing in accelerating progress in data " + "science.", + expected_results="JSL is a data science company", + ) + ], + } + + @pytest.mark.parametrize( + "fairness", + [ + MinGenderF1Score, + MaxGenderF1Score, + MinGenderRougeScore, + MaxGenderRougeScore, + ], + ) + def test_transform(self, fairness: BaseFairness, sample_data) -> None: + """ + Test case for fairness classes. + + Args: + fairness (Type[fairness]): The fairness class to be tested. + sample_data (List]): A list containing sample instances. + + Returns: + None + + Raises: + AssertionError: If the transformation or the final result is invalid. + """ + for alias in fairness.alias_name: + for task in fairness.supported_tasks: + transform_results = fairness.transform( + alias, sample_data[task], self.fairness_config[alias] + ) + assert isinstance(transform_results, list) + + for _, result in zip(sample_data, transform_results): + assert isinstance(result, MaxScoreSample) or isinstance( + result, MinScoreSample + )