From 2f5a119b831b3e4ffa203c3ce82c35e26c4152ee Mon Sep 17 00:00:00 2001
From: Natalia <natalia1806@yandex-team.ru>
Date: Wed, 16 Aug 2023 14:35:07 +0300
Subject: [PATCH] updated the Multilabel and Pairwise documentation

---
 .../multilabel/binary_relevance.py            | 67 +++++++++--------
 .../aggregation/pairwise/bradley_terry.py     | 71 ++++++++++---------
 crowdkit/aggregation/pairwise/noisy_bt.py     | 62 +++++++++-------
 3 files changed, 112 insertions(+), 88 deletions(-)

diff --git a/crowdkit/aggregation/multilabel/binary_relevance.py b/crowdkit/aggregation/multilabel/binary_relevance.py
index be0242b9..5cfc1599 100644
--- a/crowdkit/aggregation/multilabel/binary_relevance.py
+++ b/crowdkit/aggregation/multilabel/binary_relevance.py
@@ -13,23 +13,35 @@
 
 @attr.s
 class BinaryRelevance(BaseClassificationAggregator):
-    r"""Simple aggregation algorithm for multi-label classification.
-
-    Binary Relevance is a straightforward approach for multi-label classification aggregation:
-    each label is treated as a class in binary classification problem and aggregated separately using
-    aggregation algorithms for classification, e.g. Majority Vote or Dawid Skene.
+    r"""The **Binary Relevance** algorithm is a simple aggregation algorithm for the multi-label classification.
+
+    Binary Relevance is a straightforward approach for the multi-label classification aggregation:
+    each label is represented as a class in the binary classification problem and aggregated separately using
+    aggregation algorithms for classification (e.g., Majority Vote or Dawid-Skene). Specifically,
+    for each class label $λ_j$, Binary Relevance derives a binary training set $D_j$ from the original
+    multi-label training set $D$ in the following way:
+    $$
+    D_j = {(x^i, y_j^i) | 1 <= i <= m}.
+    $$
+    In other words, each multi-label training example $(x^i, y^i)$ is transformed into a binary training example
+    based on its relevancy to $λ_j$.
 
     {% note info %}
 
-    If this method is used for single-label classification, the output of the BinaryRelevance method may differ
-    from the output of the basic aggregator used for its intended purpose, since each class generates a binary
+    If this method is used for the single-label classification, the output of the Binary Relevance method may differ
+    from the output of the basic aggregator used for its intended purpose since each class generates a binary
     classification task, and therefore it is considered separately. For example, some objects may not have labels.
 
     {% endnote %}
 
+    M-L. Zhang, Y-K. Li, X-Y. Liu, X. Geng. Binary Relevance for Multi-Label Learning: An Overview.
+    *Frontiers of Computer Science. Vol. 12*, 2 (2018), 191-202.
+
+    <http://palm.seu.edu.cn/zhangml/files/FCS'17.pdf>
+
     Args:
-        base_aggregator: Aggregator instance that will be used for each binary classification. All class parameters
-         will be copied, except for the results of previous fit.
+        base_aggregator: The aggregator instance that will be used for each binary classification. All class parameters
+         will be copied, except for the results of the previous fit.
 
     Examples:
         >>> import pandas as pd
@@ -48,15 +60,14 @@ class BinaryRelevance(BaseClassificationAggregator):
         >>> result = BinaryRelevance(DawidSkene(n_iter=10)).fit_predict(df)
 
     Attributes:
-        labels_ (typing.Optional[pandas.core.series.Series]): Tasks' labels.
-            A pandas.Series indexed by `task` such that `labels.loc[task]`
-            is the tasks' aggregated labels.
-
-        aggregators_ (dict[str, BaseClassificationAggregator]): Labels' aggregators matched to classes.
-            A dictionary that matches aggregators to classes.
-            The key is the class found in the source data,
-            and the value is the aggregator used for this class.
-            The set of keys is all the classes that are in the input data.
+        labels_ (typing.Optional[pandas.core.series.Series]): The task labels.
+            The `pandas.Series` data is indexed by `task` so that `labels.loc[task]` is a list of the task aggregated labels.
+
+        aggregators_ (dict[str, BaseClassificationAggregator]): The label aggregators matched to the classes.
+            It is represented as a dictionary that matches the aggregators to the classes.
+            The key is a class found in the source data,
+            and the value is an aggregator used for this class.
+            The set of keys is all the classes that are used in the input data.
     """
     base_aggregator: BaseClassificationAggregator = attr.ib(
         # validator=attr.validators.instance_of(BaseClassificationAggregator),
@@ -69,12 +80,12 @@ def _any_name_except_a_name_of_an_attribute(self, attribute: Any, value: Any) ->
             "Aggregator argument should be a classification aggregator"
 
     def fit(self, data: pd.DataFrame) -> 'BinaryRelevance':
-        """Fit the aggregators.
+        """Fits the model to the training data.
 
         Args:
-            data (DataFrame): Workers' labeling results.
-                A pandas.DataFrame containing `task`, `worker` and `label` columns.
-                'label' column should contain list of labels, e.g. ['tree', 'house', 'car']
+            data (DataFrame): The training dataset of workers' labeling results
+                which is represented as the `pandas.DataFrame` data containing `task`, `worker`, and `label` columns.
+                The `label` column should contain a list of labels (e.g., ['tree', 'house', 'car']).
 
         Returns:
             BinaryRelevance: self.
@@ -107,15 +118,15 @@ def fit(self, data: pd.DataFrame) -> 'BinaryRelevance':
         return self
 
     def fit_predict(self, data: pd.DataFrame) -> pd.Series:
-        """Fit the model and return aggregated results.
+        """Fits the model to the training data and returns the aggregated results.
 
          Args:
-             data (DataFrame): Workers' labeling results.
-                 A pandas.DataFrame containing `task`, `worker` and `label` columns.
+             data (DataFrame): The training dataset of workers' labeling results
+                which is represented as the `pandas.DataFrame` data containing `task`, `worker`, and `label` columns.
 
          Returns:
-             Series: Tasks' labels.
-                 A pandas.Series indexed by `task` such that `labels.loc[task]`
-                 is a list with the task's aggregated labels.
+             Series: Task labels.
+                 The `pandas.Series` data is indexed by `task` so that `labels.loc[task]`
+                 is a list of the task aggregated labels.
          """
         return self.fit(data).labels_
diff --git a/crowdkit/aggregation/pairwise/bradley_terry.py b/crowdkit/aggregation/pairwise/bradley_terry.py
index fbd979f0..8e50e17f 100644
--- a/crowdkit/aggregation/pairwise/bradley_terry.py
+++ b/crowdkit/aggregation/pairwise/bradley_terry.py
@@ -14,43 +14,45 @@
 
 @attr.s
 class BradleyTerry(BasePairwiseAggregator):
-    r"""Bradley-Terry model for pairwise comparisons.
-
-    The model implements the classic algorithm for aggregating pairwise comparisons.
-    The algorithm constructs an items' ranking based on pairwise comparisons. Given
-    a pair of two items $i$ and $j$, the probability of $i$ to be ranked higher is,
-    according to the Bradley-Terry's probabilistic model,
+    r"""The **Bradley-Terry model for paired comparisons** implements the classic algorithm
+    for aggregating paired comparisons. The algorithm constructs the ranking of items based on paired comparisons.
+    Given a pair of two items $i$ and $j$, the probability that $i$ is ranked higher than $j$,
+    according to the probabilistic Bradley-Terry model, is
     $$
-    P(i > j) = \frac{p_i}{p_i + p_j}.
+    P(i > j) = \frac{p_i}{p_i + p_j},
     $$
-    Here $\boldsymbol{p}$ is a vector of positive real-valued parameters that the algorithm optimizes. These
-    optimization process maximizes the log-likelihood of observed comparisons outcomes by the MM-algorithm:
+    where $\boldsymbol{p}$ is a vector of the positive real-valued parameters that the algorithm optimizes. These
+    optimization process maximizes the log-likelihood of the outcomes of the observed comparisons using the MM algorithm:
     $$
     L(\boldsymbol{p}) = \sum_{i=1}^n\sum_{j=1}^n[w_{ij}\ln p_i - w_{ij}\ln (p_i + p_j)],
     $$
-    where $w_{ij}$ denotes the number of comparisons of $i$ and $j$ "won" by $i$.
+    where $w_{ij}$ denotes the number of times individual $i$ has beaten individual $j$ and we assume $w_{ij} = 0$ by convention.
 
     {% note info %}
 
-    The Bradley-Terry model needs the comparisons graph to be **strongly connected**.
+    The Bradley-Terry model requires the comparison graph to be **strongly connected**.
 
     {% endnote %}
 
-    David R. Hunter.
-    MM algorithms for generalized Bradley-Terry models
-    *Ann. Statist.*, Vol. 32, 1 (2004): 384–406.
+    David R. Hunter. MM Algorithms for Generalized Bradley-Terry Models.
+    *Ann. Statist. Vol. 32*, 1 (2004), 384–406.
+
+    <https://projecteuclid.org/journals/annals-of-statistics/volume-32/issue-1/MM-algorithms-for-generalized-Bradley-Terry-models/10.1214/aos/1079120141.full>
+
+    R. A. Bradley, M. E. Terry. Rank Analysis of Incomplete Block Designs: I. The Method of Paired Comparisons.
+    *Biometrika. Vol. 39*, 3/4 (1952), 324–345.
 
-    Bradley, R. A. and Terry, M. E.
-    Rank analysis of incomplete block designs. I. The method of paired comparisons.
-    *Biometrika*, Vol. 39 (1952): 324–345.
+    <https://doi.org/10.2307/2334029>
 
     Args:
-        n_iter: A number of optimization iterations.
+        n_iter: The maximum number of optimization iterations.
+        tol: The tolerance stopping criterion for iterative methods with a variable number of steps.
+            The algorithm converges when the loss change is less than the `tol` parameter.
 
     Examples:
-        The Bradley-Terry model needs the data to be a `DataFrame` containing columns
-        `left`, `right`, and `label`. `left` and `right` contain identifiers of left and
-        right items respectively, `label` contains identifiers of items that won these
+        The Bradley-Terry model requires the `DataFrame` data containing columns
+        `left`, `right`, and `label`. `left` and `right` contain the identifiers of the left and
+        right items respectively, `label` contains the identifiers of the items that won these
         comparisons.
 
         >>> import pandas as pd
@@ -64,8 +66,9 @@ class BradleyTerry(BasePairwiseAggregator):
         >>> )
 
     Attributes:
-        scores_ (Series): 'Labels' scores.
-            A pandas.Series index by labels and holding corresponding label's scores
+        scores_ (Series): The label scores.
+            The `pandas.Series` data is indexed by `label` and contains the corresponding label scores.
+        loss_history_ (List[float]): A list of loss values during training.
     """
 
     n_iter: int = attr.ib()
@@ -74,10 +77,11 @@ class BradleyTerry(BasePairwiseAggregator):
     loss_history_: List[float] = attr.ib(init=False)
 
     def fit(self, data: pd.DataFrame) -> 'BradleyTerry':
-        """Args:
-            data (DataFrame): Workers' pairwise comparison results.
-                A pandas.DataFrame containing `worker`, `left`, `right`, and `label` columns'.
-                For each row `label` must be equal to either `left` column or `right` column.
+        """Fits the model to the training data.
+        Args:
+            data (DataFrame): The training dataset of workers' paired comparison results
+                which is represented as the `pandas.DataFrame` data containing `worker`, `left`, `right`, and `label` columns.
+                Each row `label` must be equal to either the `left` or `right` column.
 
         Returns:
             BradleyTerry: self.
@@ -126,14 +130,15 @@ def fit(self, data: pd.DataFrame) -> 'BradleyTerry':
         return self
 
     def fit_predict(self, data: pd.DataFrame) -> pd.Series:
-        """Args:
-            data (DataFrame): Workers' pairwise comparison results.
-                A pandas.DataFrame containing `worker`, `left`, `right`, and `label` columns'.
-                For each row `label` must be equal to either `left` column or `right` column.
+        """Fits the model to the training data and returns the aggregated results.
+        Args:
+            data (DataFrame): The training dataset of workers' paired comparison results
+                which is represented as the `pandas.DataFrame` data containing `worker`, `left`, `right`, and `label` columns.
+                Each row `label` must be equal to either the `left` or `right` column.
 
         Returns:
-            Series: 'Labels' scores.
-                A pandas.Series index by labels and holding corresponding label's scores
+            Series: The label scores.
+                The `pandas.Series` data is indexed by `label` and contains the corresponding label scores.
         """
         return self.fit(data).scores_
 
diff --git a/crowdkit/aggregation/pairwise/noisy_bt.py b/crowdkit/aggregation/pairwise/noisy_bt.py
index 57436bc9..a06a84e6 100644
--- a/crowdkit/aggregation/pairwise/noisy_bt.py
+++ b/crowdkit/aggregation/pairwise/noisy_bt.py
@@ -15,16 +15,22 @@
 
 @attr.s
 class NoisyBradleyTerry(BasePairwiseAggregator):
-    r"""Bradley-Terry model for pairwise comparisons with additional parameters.
-
-    This model is a modification of the [Bradley-Terry model](crowdkit.aggregation.pairwise.bradley_terry.BradleyTerry.md)
-    with parameters for workers' skills (reliability) and biases.
-
+    r"""The **Bradley-Terry model for paired comparisons with the additional parameters** is a modification
+    of the [Bradley-Terry model](crowdkit.aggregation.pairwise.bradley_terry.BradleyTerry.md)
+    with the parameters for the workers' skills (reliability) and biases.
+
+    Args:
+        n_iter: The maximum number of optimization iterations.
+        tol: The tolerance stopping criterion for iterative methods with a variable number of steps.
+            The algorithm converges when the loss change is less than the `tol` parameter.
+        random_state: The seed number for the random initialization.
+        regularization_ratio: The regularization ratio.
+    
     Examples:
         The following example shows how to aggregate results of comparisons **grouped by some column**.
-        In the example the two questions `q1` and `q2` are used to group the labeled data.
-        Temporary data structure is created and the model is applied to it.
-        The results are splitted in two arrays, and each array contains scores for one of the initial groups.
+        In the example, two questions `q1` and `q2` are used to group the labeled data.
+        The temporary data structure is created and the model is applied to it.
+        The results are divided into two arrays, and each array contains scores for one of the initial groups.
 
         >>> import pandas as pd
         >>> from crowdkit.aggregation import NoisyBradleyTerry
@@ -39,9 +45,9 @@ class NoisyBradleyTerry(BasePairwiseAggregator):
         >>>     ],
         >>>     columns=['question', 'worker', 'left', 'right', 'label']
         >>> )
-        >>> # Append question to other columns. After that the data looks like:
-        >>> #   question worker     left    right    label
-        >>> # 0       q1     w1  (q1, a)  (q1, b)  (q1, a)
+        >>> # Append question to other columns. After that, the data looks like:
+        >>> #    question worker     left    right    label
+        >>> # 0     q1     w1      (q1, a)  (q1, b)  (q1, a)
         >>> for col in 'left', 'right', 'label':
         >>>     data[col] = list(zip(data['question'], data[col]))
         >>> result = NoisyBradleyTerry(n_iter=10).fit_predict(data)
@@ -51,12 +57,12 @@ class NoisyBradleyTerry(BasePairwiseAggregator):
         >>> print(result['q2']['b']) # Score for the item b in the q2 question
 
     Attributes:
-        scores_ (Series): 'Labels' scores.
-            A pandas.Series index by labels and holding corresponding label's scores
-        skills_ (Series): workers' skills.
-            A pandas.Series index by workers and holding corresponding worker's skill
-        biases_ (Series): Predicted biases for each worker. Indicates the probability of a worker to choose the left item..
-            A series of workers' biases indexed by workers
+        scores_ (Series): The label scores.
+            The `pandas.Series` data is indexed by `label` and contains the corresponding label scores.
+        skills_ (Series): The workers' skills. The `pandas.Series` data is indexed by `worker`
+            and has the corresponding worker skill.
+        biases_ (Series): The predicted biases for each worker. Indicates the probability of a worker to choose the left item.
+            The `pandas.Series` data is indexed by `worker` and has the corresponding worker bias.
     """
     n_iter: int = attr.ib(default=100)
     tol: float = attr.ib(default=1e-5)
@@ -68,10 +74,11 @@ class NoisyBradleyTerry(BasePairwiseAggregator):
     # scores_
 
     def fit(self, data: pd.DataFrame) -> 'NoisyBradleyTerry':
-        """Args:
-            data (DataFrame): Workers' pairwise comparison results.
-                A pandas.DataFrame containing `worker`, `left`, `right`, and `label` columns'.
-                For each row `label` must be equal to either `left` column or `right` column.
+        """Fits the model to the training data.
+        Args:
+            data (DataFrame): The training dataset of workers' paired comparison results
+                which is represented as the `pandas.DataFrame` data containing `worker`, `left`, `right`, and `label` columns.
+                Each row `label` must be equal to either the `left` or `right` column.
 
         Returns:
             NoisyBradleyTerry: self.
@@ -97,14 +104,15 @@ def fit(self, data: pd.DataFrame) -> 'NoisyBradleyTerry':
         return self
 
     def fit_predict(self, data: pd.DataFrame) -> pd.Series:
-        """Args:
-            data (DataFrame): Workers' pairwise comparison results.
-                A pandas.DataFrame containing `worker`, `left`, `right`, and `label` columns'.
-                For each row `label` must be equal to either `left` column or `right` column.
+        """Fits the model to the training data and returns the aggregated results.
+        Args:
+            data (DataFrame): The training dataset of workers' paired comparison results
+                which is represented as the `pandas.DataFrame` data containing `worker`, `left`, `right`, and `label` columns.
+                Each row `label` must be equal to either the `left` or `right` column.
 
         Returns:
-            Series: 'Labels' scores.
-                A pandas.Series index by labels and holding corresponding label's scores
+            Series: The label scores.
+                The `pandas.Series` data is indexed by `label` and contains the corresponding label scores.
         """
         return self.fit(data).scores_