Skip to content

Commit

Permalink
Merge pull request #682 from JohnSnowLabs/add-unittests-for-fairness-…
Browse files Browse the repository at this point in the history
…classes

Pytest for fairness class
  • Loading branch information
JulesBelveze committed Jul 31, 2023
2 parents d9621ed + 224d6e6 commit 250c05a
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 15 deletions.
4 changes: 2 additions & 2 deletions langtest/transform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,9 +832,9 @@ def transform(self) -> List[Sample]:
lambda x: x.split("-")[-1] if isinstance(x, str) else x
)
y_true = y_true.dropna()
params["test_name"] = test_name

transformed_samples = self.supported_tests[test_name].transform(
y_true, params
test_name, y_true, params
)

for sample in transformed_samples:
Expand Down
54 changes: 41 additions & 13 deletions langtest/transform/fairness.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,26 @@ class MinGenderF1Score(BaseFairness):
Transforms the input data into an output based on the minimum F1 score.
"""

alias_name = "min_gender_f1_score"
alias_name = ["min_gender_f1_score"]

@staticmethod
def transform(data: List[Sample], params: Dict) -> List[MinScoreSample]:
@classmethod
def transform(
cls, test: str, data: List[Sample], params: Dict
) -> List[MinScoreSample]:
"""Computes the minimum F1 score for the given data.
Args:
test (str): name of the test
data (List[Sample]): The input data to be transformed.
params (Dict): parameters for tests configuration
params (Dict): parameters for tests configuration.
Returns:
List[MinScoreSample]: The transformed data based on the minimum F1 score.
"""

assert (
test in cls.alias_name
), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"

if isinstance(params["min_score"], dict):
min_scores = params["min_score"]
elif isinstance(params["min_score"], float):
Expand Down Expand Up @@ -163,18 +171,24 @@ class MaxGenderF1Score(BaseFairness):
Transforms the input data into an output based on the maximum F1 score.
"""

alias_name = "max_gender_f1_score"
alias_name = ["max_gender_f1_score"]

@staticmethod
def transform(data: List[Sample], params: Dict) -> List[MaxScoreSample]:
@classmethod
def transform(
cls, test: str, data: List[Sample], params: Dict
) -> List[MaxScoreSample]:
"""Computes the maximum F1 score for the given data.
Args:
test (str): name of the test.
data (List[Sample]): The input data to be transformed.
params (Dict): parameters for tests configuration
Returns:
List[MaxScoreSample]: The transformed data based on the maximum F1 score.
"""
assert (
test in cls.alias_name
), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"
if isinstance(params["max_score"], dict):
max_scores = params["max_score"]
elif isinstance(params["max_score"], float):
Expand Down Expand Up @@ -250,16 +264,23 @@ class MinGenderRougeScore(BaseFairness):
]
supported_tasks = ["question-answering", "summarization"]

@staticmethod
def transform(data: List[Sample], params: Dict) -> List[MinScoreSample]:
@classmethod
def transform(
cls, test: str, data: List[Sample], params: Dict
) -> List[MinScoreSample]:
"""Computes the min rouge score for the given data.
Args:
test (str): name of the test.
data (List[Sample]): The input data to be transformed.
params (Dict): parameters for tests configuration
Returns:
List[MinScoreSample]: The transformed data based on the minimum F1 score.
"""
assert (
test in cls.alias_name
), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"

if isinstance(params["min_score"], dict):
min_scores = params["min_score"]
elif isinstance(params["min_score"], float):
Expand All @@ -274,7 +295,7 @@ def transform(data: List[Sample], params: Dict) -> List[MinScoreSample]:
sample = MinScoreSample(
original=None,
category="fairness",
test_type=params["test_name"],
test_type=test,
test_case=key,
expected_results=MinScoreOutput(min_score=val),
)
Expand Down Expand Up @@ -343,16 +364,23 @@ class MaxGenderRougeScore(BaseFairness):
]
supported_tasks = ["question-answering", "summarization"]

@staticmethod
def transform(data: List[Sample], params: Dict) -> List[MaxScoreSample]:
@classmethod
def transform(
cls, test: str, data: List[Sample], params: Dict
) -> List[MaxScoreSample]:
"""Computes the rouge score for the given data.
Args:
test (str): name of the test.
data (List[Sample]): The input data to be transformed.
params (Dict): parameters for tests configuration
Returns:
List[MaxScoreSample]: The transformed data based on the rouge score.
"""
assert (
test in cls.alias_name
), f"Parameter 'test' should be in: {cls.alias_name}, got '{test}'"

if isinstance(params["max_score"], dict):
max_scores = params["max_score"]
elif isinstance(params["max_score"], float):
Expand All @@ -367,7 +395,7 @@ def transform(data: List[Sample], params: Dict) -> List[MaxScoreSample]:
sample = MaxScoreSample(
original=None,
category="fairness",
test_type=params["test_name"],
test_type=test,
test_case=key,
expected_results=MaxScoreOutput(max_score=val),
)
Expand Down
151 changes: 151 additions & 0 deletions tests/test_fairness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import pytest

from langtest.transform.fairness import (
BaseFairness,
MinGenderF1Score,
MaxGenderF1Score,
MinGenderRougeScore,
MaxGenderRougeScore,
)
from langtest.utils.custom_types import SequenceLabel, Span
from langtest.utils.custom_types.output import (
NEROutput,
NERPrediction,
SequenceClassificationOutput,
TranslationOutput,
)
from langtest.utils.custom_types.sample import (
MinScoreQASample,
MaxScoreQASample,
MaxScoreSample,
MinScoreSample,
NERSample,
QASample,
SequenceClassificationSample,
SummarizationSample,
ToxicitySample,
TranslationSample,
)


class Testfairness:
"""A test suite for evaluating the transformation process of various fairnesss.
This test suite ensures that the fairnesss can successfully transform input data
and produce valid results.
The fairnesss tested include Genderfairness, Ethnicityfairness,
Religionfairness, and CountryEconomicfairness.
Attributes:
fairness_config (Dict)
"""

fairness_config = {
"min_gender_f1_score": {"min_score": 0.66},
"max_gender_f1_score": {"max_score": 0.60},
"min_gender_rouge1_score": {"min_score": 0.66},
"min_gender_rouge2_score": {"min_score": 0.60},
"min_gender_rougeL_score": {"min_score": 0.66},
"min_gender_rougeLsum_score": {"min_score": 0.66},
"max_gender_rouge1_score": {"max_score": 0.66},
"max_gender_rouge2_score": {"max_score": 0.60},
"max_gender_rougeL_score": {"max_score": 0.66},
"max_gender_rougeLsum_score": {"max_score": 0.66},
}

@pytest.fixture
def sample_data(self):
"""A fixture providing sample data for the fairness transformation tests.
Returns:
list: A list containing sample SequenceClassificationSample instances.
"""
return {
"text-classification": [
SequenceClassificationSample(
original="The last good ernest movie, and the best at that. how can you not laugh at least once at this movie. the last line is a classic, as is ernest's gangster impressions, his best moment on film. this has his best lines and is a crowning achievement among the brainless screwball comedies.",
expected_results=SequenceClassificationOutput(
predictions=[SequenceLabel(label="Positive", score=1.0)]
),
),
SequenceClassificationSample(
original="After my 6 year old daughter began taking riding lessons I started looking for horse movies for her. I had always heard of National Velvet but had never seen it. Boy am I glad I bought it! It's become a favorite of mine, my 6 year old AND my 2 year old. It's a shame movies like this aren't made anymore.",
expected_results=SequenceClassificationOutput(
predictions=[SequenceLabel(label="Positive", score=1.0)]
),
),
],
"ner": [
NERSample(
original="Attendance : 3,000",
expected_results=NEROutput(
predictions=[
NERPrediction(
entity="CARDINAL",
span=Span(start=13, end=18, word="3,000"),
)
]
),
),
NERSample(
original="I do not love KFC",
expected_results=NEROutput(
predictions=[
NERPrediction(
entity="PROD", span=Span(start=14, end=17, word="KFC")
)
]
),
),
],
"question-answering": [
QASample(
original_question="What is John Snow Labs?",
original_context="John Snow Labs is a healthcare company specializing in accelerating progress in data science.",
expected_results="A healthcare company specializing in accelerating progress in data science. ",
)
],
"summarization": [
SummarizationSample(
original="John Snow Labs is a healthcare company specializing in accelerating progress in data "
"science.",
expected_results="JSL is a data science company",
)
],
}

@pytest.mark.parametrize(
"fairness",
[
MinGenderF1Score,
MaxGenderF1Score,
MinGenderRougeScore,
MaxGenderRougeScore,
],
)
def test_transform(self, fairness: BaseFairness, sample_data) -> None:
"""
Test case for fairness classes.
Args:
fairness (Type[fairness]): The fairness class to be tested.
sample_data (List]): A list containing sample instances.
Returns:
None
Raises:
AssertionError: If the transformation or the final result is invalid.
"""
for alias in fairness.alias_name:
for task in fairness.supported_tasks:
transform_results = fairness.transform(
alias, sample_data[task], self.fairness_config[alias]
)
assert isinstance(transform_results, list)

for _, result in zip(sample_data, transform_results):
assert isinstance(result, MaxScoreSample) or isinstance(
result, MinScoreSample
)

0 comments on commit 250c05a

Please sign in to comment.