From 92b58e40f366824d57be5a78c3f80fe16ac7fcd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Dvo=C5=99=C3=A1k?= Date: Fri, 10 Nov 2023 14:23:38 +0100 Subject: [PATCH] feat(moderation): add support for more models (#228) --- src/genai/schemas/descriptions.py | 12 ++++++++---- src/genai/schemas/generate_params.py | 24 +++++++++++++++++++----- src/genai/schemas/responses.py | 18 ++++++++++++++++-- 3 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/genai/schemas/descriptions.py b/src/genai/schemas/descriptions.py index 5a6d89e4..87500138 100644 --- a/src/genai/schemas/descriptions.py +++ b/src/genai/schemas/descriptions.py @@ -48,10 +48,14 @@ class Descriptions: # Params.Moderations MODERATIONS = "Leverages various models to detect hate speech in the provided inputs and generated outputs." - HAP = "Mechanism for detecting hate/abuse/profanity on a sentence level." - HAP_INPUT = "Enable/Disable HAP detection on the provided input." - HAP_OUTPUT = "Enable/Disable HAP detection on the generated output." - HAP_THRESHOLD = "The number from interval <0, 1> that causes the sentence to be flagged (default is 0.75)." + MODERATION_HAP = "Mechanism for detecting hate/abuse/profanity on a sentence level." + MODERATION_STIGMA = "Mechanism for detecting stigma-based discrimination (i.e., discrimination against an individual or group based on an attribute or characteristic that is devalued in a particular social context)." # noqa + MODERATION_IMPLICIT_HATE = "Mechanism for detecting implicit hate (i.e., subtle hatred against a specific individual or group of individuals)." # noqa + MODERATION_TYPE_INPUT = "Enable/Disable detection on the provided input." + MODERATION_TYPE_OUTPUT = "Enable/Disable detection on the generated output." + MODERATION_TYPE_THRESHOLD = ( + "The number from interval <0, 1> that causes the sentence to be flagged (default is 0.75)." + ) # Params.Chat CONVERSATION_ID = "ID of the conversation. Always empty for the first request. Pass only in case you want to join existing conversation." diff --git a/src/genai/schemas/generate_params.py b/src/genai/schemas/generate_params.py index 76a54ec8..7f1d2e19 100644 --- a/src/genai/schemas/generate_params.py +++ b/src/genai/schemas/generate_params.py @@ -41,16 +41,30 @@ def __init__(self, *args, **kwargs): # Link to doc : https://workbench.res.ibm.com/docs/api-reference#generate -class HAPOptions(BaseModel): - input: bool = Field(description=tx.HAP_INPUT, default=True) - output: bool = Field(description=tx.HAP_OUTPUT, default=True) - threshold: float = Field(description=tx.HAP_THRESHOLD, ge=0, le=1, multiple_of=0.01, default=0.75) +class ModerationTypeOptions(BaseModel): + input: bool = Field(description=tx.MODERATION_TYPE_INPUT, default=True) + output: bool = Field(description=tx.MODERATION_TYPE_OUTPUT, default=True) + threshold: float = Field(description=tx.MODERATION_TYPE_THRESHOLD, ge=0, le=1, multiple_of=0.01, default=0.75) + + +class HAPOptions(ModerationTypeOptions): + pass + + +class StigmaOptions(ModerationTypeOptions): + pass + + +class ImplicitHateOptions(ModerationTypeOptions): + pass class ModerationsOptions(BaseModel): model_config = ConfigDict(extra="allow", populate_by_name=True) - hap: Union[bool, HAPOptions] = Field(description=tx.HAP, default=False) + hap: Union[bool, HAPOptions] = Field(description=tx.MODERATION_HAP, default=False) + stigma: Union[bool, StigmaOptions] = Field(description=tx.MODERATION_STIGMA, default=False) + implicit_hate: Union[bool, ImplicitHateOptions] = Field(description=tx.MODERATION_IMPLICIT_HATE, default=False) class GenerateParams(BaseModel): diff --git a/src/genai/schemas/responses.py b/src/genai/schemas/responses.py index f21e982a..540c1bf9 100644 --- a/src/genai/schemas/responses.py +++ b/src/genai/schemas/responses.py @@ -66,15 +66,29 @@ class TextPosition(GenAiResponseModel): end: int -class HAPResult(GenAiResponseModel): +class ModerationTypeResult(GenAiResponseModel): flagged: bool score: float success: bool position: TextPosition +class HAPResult(ModerationTypeResult): + pass + + +class StigmaResult(ModerationTypeResult): + pass + + +class ImplicitHateResult(ModerationTypeResult): + pass + + class ModerationResult(GenAiResponseModel): - hap: List[HAPResult] + hap: Optional[List[HAPResult]] = None + stigma: Optional[List[StigmaResult]] = None + implicit_hate: Optional[List[ImplicitHateResult]] = None class GeneratedToken(GenAiResponseModel):