diff --git a/tests/models/__init__.py b/tests/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/models/estimators/__init__.py b/tests/models/estimators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/models/estimators/manual/__init__.py b/tests/models/estimators/manual/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/models/estimators/manual/test_functional.py b/tests/models/estimators/manual/test_functional.py new file mode 100644 index 0000000..e6e3fd8 --- /dev/null +++ b/tests/models/estimators/manual/test_functional.py @@ -0,0 +1,18 @@ +from math import isclose +from vizard.models.estimators.manual.functional import extend_mean + +from pytest import mark + + +@mark.parametrize( + argnames=["given_percent", "given_new_value", "expected"], + argvalues=[ + (0.0, 0.3, 0.3), # left boundary + (1.0, 0.3, 1.0), # right boundary + (0.1, 0.8, 0.82), # middle points + (0.8, 0.1, 0.82), # middle points + ], +) +def test_extend_mean(given_percent: float, given_new_value: float, expected: float): + given = extend_mean(percent=given_percent, new_value=given_new_value) + assert isclose(expected, given) diff --git a/vizard/models/estimators/__init__.py b/vizard/models/estimators/__init__.py index 9b72660..a1d9753 100644 --- a/vizard/models/estimators/__init__.py +++ b/vizard/models/estimators/__init__.py @@ -1,4 +1,4 @@ -"""Contains any model/estimator that can be used for predicting values. +"""Contains any model/estimator that can be used for predicting or manipulating values. It could be as simple as an API for weighted average, a machine learning model such as *SVM* or *RandomForest* to super complicated @@ -8,6 +8,8 @@ Submodules: * :mod:`vizard.models.estimators.networks `: contains all the neural network models + * :mod:`vizard.models.estimators.manual`: Any method that enables integration of variables, + parameters, etc that are provided manually. .. _sklearn-estimator: https://scikit-learn.org/stable/glossary.html#term-estimator @@ -18,3 +20,5 @@ # set logger logger = logging.getLogger(__name__) + + diff --git a/vizard/models/estimators/manual/__init__.py b/vizard/models/estimators/manual/__init__.py new file mode 100644 index 0000000..b4bd3d6 --- /dev/null +++ b/vizard/models/estimators/manual/__init__.py @@ -0,0 +1,10 @@ +"""Hosts all heuristics for integration of models constructed based on domain experts' knowledge + +Examples could be if-else models, frequency based statistical models, etc. +""" + +# helpers +import logging + +# set logger +logger = logging.getLogger(__name__) diff --git a/vizard/models/estimators/manual/constant.py b/vizard/models/estimators/manual/constant.py new file mode 100644 index 0000000..f5a2ce4 --- /dev/null +++ b/vizard/models/estimators/manual/constant.py @@ -0,0 +1,31 @@ +__all__ = [ + 'InvitationLetterSenderRelation', 'INVITATION_LETTER_SENDER_IMPORTANCE' +] + +from enum import Enum +from typing import Dict + + +class InvitationLetterSenderRelation(Enum): + CHILD = "child" # from your child(ren) + SIBLING = "sibling" # from your sibling(s) + PARENT = "parent" # from your parent(s) + F2 = "f2" # from your second-degree family (aunt, uncle, etc) + F3 = "f3" # from your third-degree family (children of "f2") + FRIEND = "friend" # from your friend (assuming non-legendary person) + SPOUSE = "spouse" # from your spouse (you should have had "family" visa already) + PRO_UNRELATED = "pro_unrelated" # professional yet not related to your career + PRO_RELATED = "pro_related" # professional and aligned with your career + + +INVITATION_LETTER_SENDER_IMPORTANCE: Dict[InvitationLetterSenderRelation, float] = { + InvitationLetterSenderRelation.CHILD: 1.0, + InvitationLetterSenderRelation.SIBLING: 0.98, + InvitationLetterSenderRelation.PARENT: 0.75, + InvitationLetterSenderRelation.F2: 0.5, + InvitationLetterSenderRelation.F3: 0.15, + InvitationLetterSenderRelation.FRIEND: 0.1, + InvitationLetterSenderRelation.SPOUSE: 0.1, + InvitationLetterSenderRelation.PRO_UNRELATED: 0.1, + InvitationLetterSenderRelation.PRO_RELATED: 0.35, +} diff --git a/vizard/models/estimators/manual/core.py b/vizard/models/estimators/manual/core.py new file mode 100644 index 0000000..fc05891 --- /dev/null +++ b/vizard/models/estimators/manual/core.py @@ -0,0 +1,162 @@ + +from typing import Any, Dict, List, Optional + +from vizard.data.constant import FeatureCategories +from vizard.models.estimators.manual import functional + + +class ParameterBuilderBase: + """A base class for applying ``ParameterBuilder` instances as a composable object + + For each parameter that needs to be integrated, this class needs to be extended. + + Using class :class:`vizard.models.estimators.manual.ParameterBuilderBase` + one can apply different methods as hooks to variables. The specification is for the + idea that output of a ML modeling (such as XGBoost) requires precise manipulation. Hence, + all the manipulation required is carried in the definition of classes which extend + :class:`vizard.models.estimators.manual.ParameterBuilderBase` as "operator"s. + + If we have the following inputs:: + + ```python + vars = {"prob": prob, "pot": pot} + ``` + + Then, when calling this class, we would find a method that matches the name of the input ``vars`` + and then call that method on that those ``vars``. E.g.,:: + + ```python + ParameterBuilder1.prob_modifier(vars["prob"]) # method `prob_modifier` matches `"prob"` variable + ParameterBuilder1.xai_modifier(None) # no calls since no match (vars["xai"] is None) + + ParameterBuilder2.prob_modifier(vars["prob"]) # method `prob_modifier` matches `"prob"` variable + ParameterBuilder2.pot_modifier(vars["pot"]) # method `pot_modifier` matches `"pot"` variable + ``` + """ + + def __init__( + self, + name: str, + responses: Dict[str, float], + feature_category: FeatureCategories | List[FeatureCategories], + ) -> None: + """Initializes a parameter to be built manually + + Args: + name (str): The name of the parameter (e.g., features in a decision tree) + responses (Dict[str, float]): Responses or values that this parameter can take with + their corresponding importance that are normalized (their sum is ``1``). Keys + are the possible values this parameter takes, and values are the importance + of each response in range of [0, 1]. + feature_category (:class:`vizard.data.constant.FeatureCategories`): Which category of + features/parameters this parameter affects. Note that it can take multiple values + if a list of :class:`vizard.data.constant.FeatureCategories` is provided. + """ + self.name = name + self.responses = responses + self.feature_category = feature_category + # type check + self.__type_check() + + # values required for ``_modifier`` methods + self.response: Optional[str] = None + self.importance: Optional[float] = None + + def __type_check(self) -> None: + if not isinstance(self.feature_category, FeatureCategories): + raise NotImplementedError( + "Currently, only assignment to a single category is implemented." + ) + if not isinstance(self.name, str): + raise ValueError("The name can only be string.") + + def _percent_check(self, percent: float) -> None: + """Checks if the input variable is a percentage in [0, 1] + + Args: + percent (float): A standardized value + """ + + if not isinstance(percent, float): + raise ValueError(f"'{percent}' is not a float.") + + if (percent > 1.0) or (percent < 0.0): + raise ValueError("'Value should be in '0.0<=value<=1.0'") + + def _check_importance_set(self) -> None: + """Checks if operators are ready to be used by this class + + raises: + ValueError: if ``operators`` don't have the ``importance`` attribute set. + In this case, `operator.set_response` method should be called prior to + using this method. + """ + + if self.importance is None: + raise ValueError( + f"operator must have a value for ``importance``." + f"`self.set_response` method should be called prior to using this method." + ) + + def __get_importance(self, response: str, raw: bool = False) -> float: + """Calculates the importance of the parameter based on the ``response`` given + + Note: + Method :meth:`vizard.models.estimators.manual.core.set_response` verifies the + correctness of the ``response`` provided. + + Args: + response (str): A string representing a possible value of this parameter + which is one of the keys of :attr:`self.responses`. + raw (bool): Whether to return the raw importance provided initially by the user + (residing in ``self.responses``) or normalized one if True. Defaults to True. + """ + if raw: + return self.responses[response] + raise NotImplementedError("Normalized importance is not yet implemented.") + + def set_response(self, response: str, raw: bool = False) -> float: + """Sets the response to calculate ``self.importance`` used for ``_modifier`` s + + Args: + response (str): A string representing a possible value of this parameter + which is one of the keys of :attr:`self.responses`. + raw (bool): Whether to return the raw importance provided initially by the user + (residing in ``self.responses``) or normalized one if True. Defaults to True. + Returns: + float: Returns the calculated importance + """ + # check if response is valid + if response not in self.responses.keys(): + raise ValueError(f"'{response}' is not valid.") + + self.response = response + self.importance = self.__get_importance(response=response, raw=raw) + # check for range if raw=false + if not raw: + self._percent_check(percent=self.importance) + return self.importance + + def potential_modifier(self, potential: float) -> float: + """Given an importance (e.g., XAI) recomputes ``potential`` by including this variable + + The value of ``importance`` is proportional to the whole value of ``potential``. + E.g., if ``importance=0.3``, then when a new ``potential`` is computed, + this new variable contributes ``%30`` to the overall value of ``potential``. + + Args: + potential (float): the old potential value without the effect of this variable + """ + raise NotImplementedError("Please extend this class and implement this method") + + def probability_modifier(self, probability: float) -> float: + """Given an importance (e.g., XAI) recomputes ``probability`` by including this variable + + The value of ``importance`` is proportional to the whole value of ``probability``. + E.g., if ``importance=0.3``, then when a new ``probability`` is computed, + this new variable contributes ``%30`` to the overall value of ``probability``. + + Args: + probability (float): the old probability value without the effect of this variable + """ + raise NotImplementedError("Please extend this class and implement this method") diff --git a/vizard/models/estimators/manual/functional.py b/vizard/models/estimators/manual/functional.py new file mode 100644 index 0000000..4b71ae1 --- /dev/null +++ b/vizard/models/estimators/manual/functional.py @@ -0,0 +1,17 @@ +def extend_mean(percent: float, new_value: float) -> float: + """Takes a percent and adds a new value while keep the percent standardized + + This method is to take a standardized value ``percent`` (in range ``[0, 1]``), and takes + another standard value ``new_value``. Then adds this value to the ``percent`` while + keeping ``percent`` standardized (i.e., in range of ``[0, 1]``) + + Args: + percent (float): A number in range of ``[0, 1]`` + new_value (float): the value to be added to ``percent`` in range of ``[0, 1]`` + + Returns: + float: Newly standardized value of ``percent`` in range of ``[0, 1]`` + """ + + new_percent: float = percent + new_value * (1 - percent) + return new_percent