remove 'moerror' option (#28, #29)

- making ll and hl default required option for asymmetric CIs (ref #28 & #29)
LSYS · Oct 22, 2022 · 2b09416 · 2b09416
1 parent 1bf2c10
commit 2b09416
Show file tree

Hide file tree

Showing 5 changed files with 22 additions and 139 deletions.
diff --git a/forestplot/arg_validators.py b/forestplot/arg_validators.py
@@ -10,7 +10,6 @@ def check_data(
     varlabel: str,
     groupvar: Optional[str] = None,
     group_order: Optional[Sequence] = None,
-    moerror: Optional[str] = None,
     ll: Optional[str] = None,
     hl: Optional[str] = None,
     annote: Optional[Union[Sequence[str], None]] = None,
@@ -33,33 +32,29 @@ def check_data(
         estimate (str)
                 Name of column containing the estimates (e.g. pearson correlation coefficient,
                 OR, regression estimates, etc.).
-    varlabel (str)
-        Name of column containing the variable label to be printed out.
-        moerror (str)
-                Name of column containing the margin of error in the confidence intervals.
-                Should be available if 'll' and 'hl' are left empty.
-    groupvar (str)
-        Name of column containing group of variables.
-    group_order (list-like)
-        List of groups by order to report in the figure.
+        varlabel (str)
+                Name of column containing the variable label to be printed out.
+        groupvar (str)
+                Name of column containing group of variables.
+        group_order (list-like)
+                List of groups by order to report in the figure.
         ll (str)
                 Name of column containing the lower limit of the confidence intervals.
-                Optional
         hl (str)
                 Name of column containing the upper limit of the confidence intervals.
         annote (list-like)
                 List of columns to add as additional annotation in the plot.
         annoteheaders (list-like)
                 List of table headers to use as column headers for the additional annotations.
-    rightannote (list-like)
-        List of columns to add as additional annotation on the right-hand side of the plot.
-    right_annoteheaders (list-like)
-        List of table headers to use as column headers for the additional annotations
-        on the right-hand side of the plot.
-    pval (str)
-        Name of column containing the p-values.
-    ylabel2 (str)
-        Title of the right-hand side y-axis.
+        rightannote (list-like)
+                List of columns to add as additional annotation on the right-hand side of the plot.
+        right_annoteheaders (list-like)
+                List of table headers to use as column headers for the additional annotations
+                on the right-hand side of the plot.
+        pval (str)
+                Name of column containing the p-values.
+        ylabel2 (str)
+                Title of the right-hand side y-axis.
 
         Returns
         -------
@@ -77,12 +72,6 @@ def check_data(
         except ValueError:
             raise TypeError("Estimates should be float or int")
 
-    if (moerror is not None) and (not ptypes.is_numeric_dtype(dataframe[moerror])):
-        try:
-            dataframe[moerror] = dataframe[moerror].astype(float)
-        except ValueError:
-            raise TypeError("Margin of error values should be float or int")
-
     if (ll is not None) and (not ptypes.is_numeric_dtype(dataframe[ll])):
         try:
             dataframe[ll] = dataframe[ll].astype(float)
@@ -95,36 +84,6 @@ def check_data(
         except ValueError:
             raise TypeError("CI higherlimit values should be float or int")
 
-    ##########################################################################
-    ## Check that either moerror or ll, hl are specified.
-    ## Create the missing data from what is available
-    ##########################################################################
-    if moerror is None:
-        try:
-            assert (ll is not None) & (hl is not None)
-        except Exception:
-            raise AssertionError(
-                'If "moerror" is not provided, then "ll" and "hl" must be provided.'
-            )
-
-    if (ll is None) or (hl is None):
-        try:
-            assert moerror is not None
-        except Exception:
-            raise AssertionError(
-                'If "ll, hl" is not provided, then "moerror" must be provided.'
-            )
-
-    # if moerror not there make it
-    if moerror is None:
-        dataframe["moerror"] = dataframe[estimate] - dataframe[ll]
-
-    # if ll, hl not there make it
-    if ll is None:
-        dataframe["ll"] = dataframe[estimate] - dataframe[moerror]
-    if hl is None:
-        dataframe["hl"] = dataframe[estimate] + dataframe[moerror]
-
     ##########################################################################
     ## Check that the annotations and headers specified are list-like
     ##########################################################################

diff --git a/forestplot/plot.py b/forestplot/plot.py
@@ -47,7 +47,6 @@ def forestplot(
     dataframe: pd.core.frame.DataFrame,
     estimate: str,
     varlabel: str,
-    moerror: Optional[str] = None,
     ll: Optional[str] = None,
     hl: Optional[str] = None,
     form_ci_report: bool = True,
@@ -88,9 +87,6 @@ def forestplot(
             OR, regression estimates, etc.).
     varlabel (str)
             Name of column containing the variable label to be printed out.
-    moerror (str)
-            Name of column containing the margin of error in the confidence intervals.
-            Should be available if 'll' and 'hl' are left empty.
     ll (str)
             Name of column containing the lower limit of the confidence intervals.
     hl (str)
@@ -156,7 +152,6 @@ def forestplot(
         dataframe=_local_df,
         estimate=estimate,
         varlabel=varlabel,
-        moerror=moerror,
         pval=pval,
         ll=ll,
         hl=hl,
@@ -176,7 +171,6 @@ def forestplot(
             dataframe=_local_df,
             estimate=estimate,
             varlabel=varlabel,
-            moerror=moerror,
             ll=ll,
             hl=hl,
             form_ci_report=form_ci_report,
@@ -199,7 +193,6 @@ def forestplot(
         dataframe=_local_df,
         yticklabel="yticklabel",
         estimate=estimate,
-        moerror=moerror,
         groupvar=groupvar,
         annoteheaders=annoteheaders,
         rightannote=rightannote,
@@ -224,7 +217,6 @@ def _preprocess_dataframe(
     dataframe: pd.core.frame.DataFrame,
     estimate: str,
     varlabel: str,
-    moerror: Optional[str],
     ll: Optional[str] = None,
     hl: Optional[str] = None,
     form_ci_report: Optional[bool] = False,
@@ -277,7 +269,6 @@ def _preprocess_dataframe(
         dataframe = form_est_ci(
             dataframe=dataframe,
             estimate=estimate,
-            moerror=moerror,
             ll=ll,
             hl=hl,
             decimal_precision=decimal_precision,
@@ -329,7 +320,6 @@ def _make_forestplot(
     dataframe: pd.core.frame.DataFrame,
     yticklabel: str,
     estimate: str,
-    moerror: str,
     groupvar: str,
     pval: str,
     xticks: Optional[Union[list, range]],
@@ -356,8 +346,6 @@ def _make_forestplot(
             Matplotlib Axes object.
     """
     _, ax = plt.subplots(figsize=figsize, facecolor="white")
-    if moerror is None:
-        moerror = "moerror"
     ax = draw_ci(
         dataframe=dataframe,
         estimate=estimate,

diff --git a/forestplot/text_utils.py b/forestplot/text_utils.py
@@ -8,7 +8,6 @@
 def form_est_ci(
     dataframe: pd.core.frame.DataFrame,
     estimate: str,
-    moerror: Union[str, None],
     ll: str,
     hl: str,
     decimal_precision: int,
@@ -27,15 +26,10 @@ def form_est_ci(
     estimate (str)
             Name of column containing the estimates (e.g. pearson correlation coefficient,
             OR, regression estimates, etc.).
-    moerror (str)
-            Name of column containing the margin of error in the confidence intervals.
-            Should be available if 'll' and 'hl' are left empty.
     ll (str)
             Name of column containing the lower limit of the confidence intervals.
-            Optional
     hl (str)
             Name of column containing the upper limit of the confidence intervals.
-            Optional
     decimal_precision (int)
             Precision of 2 means we go from '0.1234' -> '0.12'.
     caps (iterable)

diff --git a/tests/test_arg_validators.py b/tests/test_arg_validators.py
@@ -22,18 +22,8 @@ def test_check_data():
     assert str(excinfo.value) == "Estimates should be float or int"
 
     # Assert that conversion for numeric estimate stored as string works
-    _df = pd.DataFrame({"estimate": numeric_as_string, "varlabel": string, "moerror": numeric})
-    check_data(dataframe=_df, estimate="estimate", varlabel="varlabel", moerror="moerror")
-
-    # Assert that assertion for numeric type for moerror works
-    _df = pd.DataFrame({"estimate": numeric, "moerror": string})
-    with pytest.raises(TypeError) as excinfo:
-        check_data(dataframe=_df, estimate="estimate", varlabel="estimate", moerror="moerror")
-    assert str(excinfo.value) == "Margin of error values should be float or int"
-
-    # Assert that conversion for numeric moerror stored as string works
-    _df = pd.DataFrame({"estimate": numeric_as_string, "moerror": numeric_as_string})
-    check_data(dataframe=_df, estimate="estimate", varlabel="estimate", moerror="moerror")
+    _df = pd.DataFrame({"estimate": numeric_as_string, "varlabel": string})
+    check_data(dataframe=_df, estimate="estimate", varlabel="varlabel")
 
     # Assert that assertion for numeric type for ll works
     _df = pd.DataFrame({"estimate": numeric, "ll": string})
@@ -67,53 +57,16 @@ def test_check_data():
     )
     check_data(dataframe=_df, estimate="estimate", varlabel="estimate", ll="ll", hl="hl")
 
-    # Assert assertion that either moerror or (ll and hl) is specified works
-    with pytest.raises(AssertionError) as excinfo:
-        check_data(dataframe=_df, estimate="estimate", varlabel="estimate")
-    assert (
-        str(excinfo.value)
-        == 'If "moerror" is not provided, then "ll" and "hl" must be provided.'
-    )
-
-    ##########################################################################
-    ## Check that column creation works
-    ##########################################################################
-    # Assert moerror is created if ll and hl specified
-    _df = pd.DataFrame(
-        {
-            "estimate": numeric_as_string,
-            "ll": numeric_as_string,
-            "hl": numeric_as_string,
-        }
-    )
-    processed_df = check_data(
-        dataframe=_df, estimate="estimate", varlabel="estimate", ll="ll", hl="hl"
-    )
-    assert "moerror" in processed_df
-
-    # Assert ll and hl is created if only moerror specified
-    _df = pd.DataFrame(
-        {
-            "estimate": numeric_as_string,
-            "moerror": numeric_as_string,
-        }
-    )
-    processed_df = check_data(
-        dataframe=_df, estimate="estimate", varlabel="estimate", moerror="moerror"
-    )
-    assert set(["ll", "hl"]).issubset(processed_df.columns)
-
     ##########################################################################
     ## Check annote
     ##########################################################################
     # Assert assertion that annote and annoteheader is same length works
-    _df = pd.DataFrame({"estimate": numeric_as_string, "moerror": numeric})
+    _df = pd.DataFrame({"estimate": numeric_as_string})
     with pytest.raises(ValueError) as excinfo:
         check_data(
             dataframe=_df,
             estimate="estimate",
             varlabel="estimate",
-            moerror="moerror",
             annote=["col1", "col2"],
             annoteheaders=["header1"],
         )
@@ -124,8 +77,7 @@ def test_check_data():
         dataframe=_df,
         estimate="estimate",
         varlabel="estimate",
-        moerror="moerror",
-        annote=["moerror"],
+        annote=["estimate"],
     )
 
     # Raise error if annote cannot be found in dataframe columns
@@ -134,7 +86,6 @@ def test_check_data():
             dataframe=_df,
             estimate="estimate",
             varlabel="estimate",
-            moerror="moerror",
             annote=["dummy"],
         )
     assert str(excinfo.value) == "the field dummy is not found in dataframe."
@@ -145,21 +96,19 @@ def test_check_data():
         dataframe=_df,
         estimate="estimate",
         varlabel="moerror",
-        moerror="moerror",
         annote=["ci_range"],
     )
 
     ##########################################################################
     ## Check rightannote
     ##########################################################################
     # Assert assertion that rightannote and right_annoteheaders is same length works
-    _df = pd.DataFrame({"estimate": numeric_as_string, "moerror": numeric})
+    _df = pd.DataFrame({"estimate": numeric_as_string})
     with pytest.raises(ValueError) as excinfo:
         check_data(
             dataframe=_df,
             estimate="estimate",
             varlabel="estimate",
-            moerror="moerror",
             rightannote=["col1", "col2"],
             right_annoteheaders=["header1"],
         )
@@ -170,8 +119,7 @@ def test_check_data():
         dataframe=_df,
         estimate="estimate",
         varlabel="estimate",
-        moerror="moerror",
-        rightannote=["moerror"],
+        rightannote=["estimate"],
     )
 
     # Raise error if rightannote cannot be found in dataframe columns
@@ -180,7 +128,6 @@ def test_check_data():
             dataframe=_df,
             estimate="estimate",
             varlabel="estimate",
-            moerror="moerror",
             rightannote=["dummy"],
         )
     assert str(excinfo.value) == "the field dummy is not found in dataframe."
@@ -191,7 +138,6 @@ def test_check_data():
         dataframe=_df,
         estimate="estimate",
         varlabel="moerror",
-        moerror="moerror",
         rightannote=["ci_range"],
     )
 
@@ -203,7 +149,6 @@ def test_check_data():
             dataframe=_df,
             estimate="estimate",
             varlabel="estimate",
-            moerror="moerror",
             right_annoteheaders=["header1"],
         )
     assert (
@@ -216,7 +161,6 @@ def test_check_data():
             dataframe=_df,
             estimate="estimate",
             varlabel="estimate",
-            moerror="moerror",
             annoteheaders=["header1"],
         )
     assert (

diff --git a/tests/test_text_utils.py b/tests/test_text_utils.py
@@ -164,9 +164,7 @@ def test_form_est_ci():
             "est_ci": ["1.00(1.00 to 1.00)", "2.00(2.00 to 2.00)"],
         }
     )
-    result_df = form_est_ci(
-        _df, estimate="estimate", moerror=None, ll="ll", hl="hl", decimal_precision=2
-    )
+    result_df = form_est_ci(_df, estimate="estimate", ll="ll", hl="hl", decimal_precision=2)
     assert_frame_equal(result_df, correct_df)