Skip to content

Commit

Permalink
remove 'moerror' option (#28, #29)
Browse files Browse the repository at this point in the history
- making ll and hl default required option for asymmetric CIs (ref #28 & #29)
  • Loading branch information
LSYS committed Oct 22, 2022
1 parent 1bf2c10 commit 2b09416
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 139 deletions.
71 changes: 15 additions & 56 deletions forestplot/arg_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ def check_data(
varlabel: str,
groupvar: Optional[str] = None,
group_order: Optional[Sequence] = None,
moerror: Optional[str] = None,
ll: Optional[str] = None,
hl: Optional[str] = None,
annote: Optional[Union[Sequence[str], None]] = None,
Expand All @@ -33,33 +32,29 @@ def check_data(
estimate (str)
Name of column containing the estimates (e.g. pearson correlation coefficient,
OR, regression estimates, etc.).
varlabel (str)
Name of column containing the variable label to be printed out.
moerror (str)
Name of column containing the margin of error in the confidence intervals.
Should be available if 'll' and 'hl' are left empty.
groupvar (str)
Name of column containing group of variables.
group_order (list-like)
List of groups by order to report in the figure.
varlabel (str)
Name of column containing the variable label to be printed out.
groupvar (str)
Name of column containing group of variables.
group_order (list-like)
List of groups by order to report in the figure.
ll (str)
Name of column containing the lower limit of the confidence intervals.
Optional
hl (str)
Name of column containing the upper limit of the confidence intervals.
annote (list-like)
List of columns to add as additional annotation in the plot.
annoteheaders (list-like)
List of table headers to use as column headers for the additional annotations.
rightannote (list-like)
List of columns to add as additional annotation on the right-hand side of the plot.
right_annoteheaders (list-like)
List of table headers to use as column headers for the additional annotations
on the right-hand side of the plot.
pval (str)
Name of column containing the p-values.
ylabel2 (str)
Title of the right-hand side y-axis.
rightannote (list-like)
List of columns to add as additional annotation on the right-hand side of the plot.
right_annoteheaders (list-like)
List of table headers to use as column headers for the additional annotations
on the right-hand side of the plot.
pval (str)
Name of column containing the p-values.
ylabel2 (str)
Title of the right-hand side y-axis.
Returns
-------
Expand All @@ -77,12 +72,6 @@ def check_data(
except ValueError:
raise TypeError("Estimates should be float or int")

if (moerror is not None) and (not ptypes.is_numeric_dtype(dataframe[moerror])):
try:
dataframe[moerror] = dataframe[moerror].astype(float)
except ValueError:
raise TypeError("Margin of error values should be float or int")

if (ll is not None) and (not ptypes.is_numeric_dtype(dataframe[ll])):
try:
dataframe[ll] = dataframe[ll].astype(float)
Expand All @@ -95,36 +84,6 @@ def check_data(
except ValueError:
raise TypeError("CI higherlimit values should be float or int")

##########################################################################
## Check that either moerror or ll, hl are specified.
## Create the missing data from what is available
##########################################################################
if moerror is None:
try:
assert (ll is not None) & (hl is not None)
except Exception:
raise AssertionError(
'If "moerror" is not provided, then "ll" and "hl" must be provided.'
)

if (ll is None) or (hl is None):
try:
assert moerror is not None
except Exception:
raise AssertionError(
'If "ll, hl" is not provided, then "moerror" must be provided.'
)

# if moerror not there make it
if moerror is None:
dataframe["moerror"] = dataframe[estimate] - dataframe[ll]

# if ll, hl not there make it
if ll is None:
dataframe["ll"] = dataframe[estimate] - dataframe[moerror]
if hl is None:
dataframe["hl"] = dataframe[estimate] + dataframe[moerror]

##########################################################################
## Check that the annotations and headers specified are list-like
##########################################################################
Expand Down
12 changes: 0 additions & 12 deletions forestplot/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def forestplot(
dataframe: pd.core.frame.DataFrame,
estimate: str,
varlabel: str,
moerror: Optional[str] = None,
ll: Optional[str] = None,
hl: Optional[str] = None,
form_ci_report: bool = True,
Expand Down Expand Up @@ -88,9 +87,6 @@ def forestplot(
OR, regression estimates, etc.).
varlabel (str)
Name of column containing the variable label to be printed out.
moerror (str)
Name of column containing the margin of error in the confidence intervals.
Should be available if 'll' and 'hl' are left empty.
ll (str)
Name of column containing the lower limit of the confidence intervals.
hl (str)
Expand Down Expand Up @@ -156,7 +152,6 @@ def forestplot(
dataframe=_local_df,
estimate=estimate,
varlabel=varlabel,
moerror=moerror,
pval=pval,
ll=ll,
hl=hl,
Expand All @@ -176,7 +171,6 @@ def forestplot(
dataframe=_local_df,
estimate=estimate,
varlabel=varlabel,
moerror=moerror,
ll=ll,
hl=hl,
form_ci_report=form_ci_report,
Expand All @@ -199,7 +193,6 @@ def forestplot(
dataframe=_local_df,
yticklabel="yticklabel",
estimate=estimate,
moerror=moerror,
groupvar=groupvar,
annoteheaders=annoteheaders,
rightannote=rightannote,
Expand All @@ -224,7 +217,6 @@ def _preprocess_dataframe(
dataframe: pd.core.frame.DataFrame,
estimate: str,
varlabel: str,
moerror: Optional[str],
ll: Optional[str] = None,
hl: Optional[str] = None,
form_ci_report: Optional[bool] = False,
Expand Down Expand Up @@ -277,7 +269,6 @@ def _preprocess_dataframe(
dataframe = form_est_ci(
dataframe=dataframe,
estimate=estimate,
moerror=moerror,
ll=ll,
hl=hl,
decimal_precision=decimal_precision,
Expand Down Expand Up @@ -329,7 +320,6 @@ def _make_forestplot(
dataframe: pd.core.frame.DataFrame,
yticklabel: str,
estimate: str,
moerror: str,
groupvar: str,
pval: str,
xticks: Optional[Union[list, range]],
Expand All @@ -356,8 +346,6 @@ def _make_forestplot(
Matplotlib Axes object.
"""
_, ax = plt.subplots(figsize=figsize, facecolor="white")
if moerror is None:
moerror = "moerror"
ax = draw_ci(
dataframe=dataframe,
estimate=estimate,
Expand Down
6 changes: 0 additions & 6 deletions forestplot/text_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
def form_est_ci(
dataframe: pd.core.frame.DataFrame,
estimate: str,
moerror: Union[str, None],
ll: str,
hl: str,
decimal_precision: int,
Expand All @@ -27,15 +26,10 @@ def form_est_ci(
estimate (str)
Name of column containing the estimates (e.g. pearson correlation coefficient,
OR, regression estimates, etc.).
moerror (str)
Name of column containing the margin of error in the confidence intervals.
Should be available if 'll' and 'hl' are left empty.
ll (str)
Name of column containing the lower limit of the confidence intervals.
Optional
hl (str)
Name of column containing the upper limit of the confidence intervals.
Optional
decimal_precision (int)
Precision of 2 means we go from '0.1234' -> '0.12'.
caps (iterable)
Expand Down
68 changes: 6 additions & 62 deletions tests/test_arg_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,8 @@ def test_check_data():
assert str(excinfo.value) == "Estimates should be float or int"

# Assert that conversion for numeric estimate stored as string works
_df = pd.DataFrame({"estimate": numeric_as_string, "varlabel": string, "moerror": numeric})
check_data(dataframe=_df, estimate="estimate", varlabel="varlabel", moerror="moerror")

# Assert that assertion for numeric type for moerror works
_df = pd.DataFrame({"estimate": numeric, "moerror": string})
with pytest.raises(TypeError) as excinfo:
check_data(dataframe=_df, estimate="estimate", varlabel="estimate", moerror="moerror")
assert str(excinfo.value) == "Margin of error values should be float or int"

# Assert that conversion for numeric moerror stored as string works
_df = pd.DataFrame({"estimate": numeric_as_string, "moerror": numeric_as_string})
check_data(dataframe=_df, estimate="estimate", varlabel="estimate", moerror="moerror")
_df = pd.DataFrame({"estimate": numeric_as_string, "varlabel": string})
check_data(dataframe=_df, estimate="estimate", varlabel="varlabel")

# Assert that assertion for numeric type for ll works
_df = pd.DataFrame({"estimate": numeric, "ll": string})
Expand Down Expand Up @@ -67,53 +57,16 @@ def test_check_data():
)
check_data(dataframe=_df, estimate="estimate", varlabel="estimate", ll="ll", hl="hl")

# Assert assertion that either moerror or (ll and hl) is specified works
with pytest.raises(AssertionError) as excinfo:
check_data(dataframe=_df, estimate="estimate", varlabel="estimate")
assert (
str(excinfo.value)
== 'If "moerror" is not provided, then "ll" and "hl" must be provided.'
)

##########################################################################
## Check that column creation works
##########################################################################
# Assert moerror is created if ll and hl specified
_df = pd.DataFrame(
{
"estimate": numeric_as_string,
"ll": numeric_as_string,
"hl": numeric_as_string,
}
)
processed_df = check_data(
dataframe=_df, estimate="estimate", varlabel="estimate", ll="ll", hl="hl"
)
assert "moerror" in processed_df

# Assert ll and hl is created if only moerror specified
_df = pd.DataFrame(
{
"estimate": numeric_as_string,
"moerror": numeric_as_string,
}
)
processed_df = check_data(
dataframe=_df, estimate="estimate", varlabel="estimate", moerror="moerror"
)
assert set(["ll", "hl"]).issubset(processed_df.columns)

##########################################################################
## Check annote
##########################################################################
# Assert assertion that annote and annoteheader is same length works
_df = pd.DataFrame({"estimate": numeric_as_string, "moerror": numeric})
_df = pd.DataFrame({"estimate": numeric_as_string})
with pytest.raises(ValueError) as excinfo:
check_data(
dataframe=_df,
estimate="estimate",
varlabel="estimate",
moerror="moerror",
annote=["col1", "col2"],
annoteheaders=["header1"],
)
Expand All @@ -124,8 +77,7 @@ def test_check_data():
dataframe=_df,
estimate="estimate",
varlabel="estimate",
moerror="moerror",
annote=["moerror"],
annote=["estimate"],
)

# Raise error if annote cannot be found in dataframe columns
Expand All @@ -134,7 +86,6 @@ def test_check_data():
dataframe=_df,
estimate="estimate",
varlabel="estimate",
moerror="moerror",
annote=["dummy"],
)
assert str(excinfo.value) == "the field dummy is not found in dataframe."
Expand All @@ -145,21 +96,19 @@ def test_check_data():
dataframe=_df,
estimate="estimate",
varlabel="moerror",
moerror="moerror",
annote=["ci_range"],
)

##########################################################################
## Check rightannote
##########################################################################
# Assert assertion that rightannote and right_annoteheaders is same length works
_df = pd.DataFrame({"estimate": numeric_as_string, "moerror": numeric})
_df = pd.DataFrame({"estimate": numeric_as_string})
with pytest.raises(ValueError) as excinfo:
check_data(
dataframe=_df,
estimate="estimate",
varlabel="estimate",
moerror="moerror",
rightannote=["col1", "col2"],
right_annoteheaders=["header1"],
)
Expand All @@ -170,8 +119,7 @@ def test_check_data():
dataframe=_df,
estimate="estimate",
varlabel="estimate",
moerror="moerror",
rightannote=["moerror"],
rightannote=["estimate"],
)

# Raise error if rightannote cannot be found in dataframe columns
Expand All @@ -180,7 +128,6 @@ def test_check_data():
dataframe=_df,
estimate="estimate",
varlabel="estimate",
moerror="moerror",
rightannote=["dummy"],
)
assert str(excinfo.value) == "the field dummy is not found in dataframe."
Expand All @@ -191,7 +138,6 @@ def test_check_data():
dataframe=_df,
estimate="estimate",
varlabel="moerror",
moerror="moerror",
rightannote=["ci_range"],
)

Expand All @@ -203,7 +149,6 @@ def test_check_data():
dataframe=_df,
estimate="estimate",
varlabel="estimate",
moerror="moerror",
right_annoteheaders=["header1"],
)
assert (
Expand All @@ -216,7 +161,6 @@ def test_check_data():
dataframe=_df,
estimate="estimate",
varlabel="estimate",
moerror="moerror",
annoteheaders=["header1"],
)
assert (
Expand Down
4 changes: 1 addition & 3 deletions tests/test_text_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,7 @@ def test_form_est_ci():
"est_ci": ["1.00(1.00 to 1.00)", "2.00(2.00 to 2.00)"],
}
)
result_df = form_est_ci(
_df, estimate="estimate", moerror=None, ll="ll", hl="hl", decimal_precision=2
)
result_df = form_est_ci(_df, estimate="estimate", ll="ll", hl="hl", decimal_precision=2)
assert_frame_equal(result_df, correct_df)


Expand Down

0 comments on commit 2b09416

Please sign in to comment.