Skip to content

Commit

Permalink
Merge pull request #183 from AI-SDC/updating_pivot_table
Browse files Browse the repository at this point in the history
updating pivot_table
  • Loading branch information
mahaalbashir committed Oct 31, 2023
2 parents ac19e10 + eddc20e commit 7a4fadb
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 156 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Expand Up @@ -3,6 +3,15 @@
## Development

Changes:
* Update table suppression when totals are true for pivot table ([#165](https://github.com/AI-SDC/ACRO/pull/165))
* Fix the problem of shape mismatch when there are two columns and the aggfunc is count or sum ([#167](https://github.com/AI-SDC/ACRO/pull/167))
* Remove all files and folders created during testing ([#168](https://github.com/AI-SDC/ACRO/pull/168))
* Create an example notebook with simple examples of acro ([#170](https://github.com/AI-SDC/ACRO/pull/170))
* Add support for histogram ([#176](https://github.com/AI-SDC/ACRO/pull/176))
* Add inherited members from acro_tables and acro_regression to the sphinx docs ([#177](https://github.com/AI-SDC/ACRO/pull/177))
* Update the R help function ([#178](https://github.com/AI-SDC/ACRO/pull/178))
* Update the finalise function by checking the provided folder name and ask for new one if it exists ([#179](https://github.com/AI-SDC/ACRO/pull/179))
* Add histogram and survival analysis to R ([#182](https://github.com/AI-SDC/ACRO/pull/182))

## Version 0.4.3 (Sep 22, 2023)

Expand Down
68 changes: 47 additions & 21 deletions acro/acro_tables.py
Expand Up @@ -137,27 +137,7 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals
normalize,
)
# delete empty rows and columns from table
deleted_rows = []
deleted_cols = []
# define empty columns and rows using boolean masks
empty_cols_mask = table.sum(axis=0) == 0
empty_rows_mask = table.sum(axis=1) == 0

deleted_cols = list(table.columns[empty_cols_mask])
table = table.loc[:, ~empty_cols_mask]
deleted_rows = list(table.index[empty_rows_mask])
table = table.loc[~empty_rows_mask, :]

# create a message with the deleted column's names
comments = []
if deleted_cols:
msg_cols = ", ".join(str(col) for col in deleted_cols)
comments.append(f"Empty columns: {msg_cols} were deleted.")
if deleted_rows:
msg_rows = ", ".join(str(row) for row in deleted_rows)
comments.append(f"Empty rows: {msg_rows} were deleted.")
if comments:
logger.info(" ".join(comments))
table, comments = delete_empty_rows_columns(table)

masks = create_crosstab_masks(
index,
Expand Down Expand Up @@ -244,6 +224,9 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
(hierarchical indexes) on the index and columns of the result
DataFrame.
To provide consistent behaviour with different aggregation functions,
'empty' rows or columns -i.e. that are all NaN or 0 (count,sum) are removed.
Parameters
----------
data : DataFrame
Expand Down Expand Up @@ -307,6 +290,9 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
sort,
)

# delete empty rows and columns from table
table, comments = delete_empty_rows_columns(table)

# suppression masks to apply based on the following checks
masks: dict[str, DataFrame] = {}

Expand Down Expand Up @@ -387,6 +373,7 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
summary=summary,
outcome=outcome,
output=[table],
comments=comments,
)
return table

Expand Down Expand Up @@ -837,6 +824,45 @@ def create_crosstab_masks( # pylint: disable=too-many-arguments,too-many-locals
return masks


def delete_empty_rows_columns(table: DataFrame) -> tuple[DataFrame, list[str]]:
"""Deletes empty rows and columns from table.
Parameters
----------
table : DataFrame
The table where the empty rows and columns will be deleted from.
Returns
-------
DataFrame
The resulting table where the empty columns and rows were deleted.
list[str]
A comment showing information about the deleted columns and rows.
"""
deleted_rows = []
deleted_cols = []
# define empty columns and rows using boolean masks
empty_cols_mask = table.sum(axis=0) == 0
empty_rows_mask = table.sum(axis=1) == 0

deleted_cols = list(table.columns[empty_cols_mask])
table = table.loc[:, ~empty_cols_mask]
deleted_rows = list(table.index[empty_rows_mask])
table = table.loc[~empty_rows_mask, :]

# create a message with the deleted column's names
comments = []
if deleted_cols:
msg_cols = ", ".join(str(col) for col in deleted_cols)
comments.append(f"Empty columns: {msg_cols} were deleted.")
if deleted_rows:
msg_rows = ", ".join(str(row) for row in deleted_rows)
comments.append(f"Empty rows: {msg_rows} were deleted.")
if comments:
logger.info(" ".join(comments))
return (table, comments)


def rounded_survival_table(survival_table):
"""Calculates the rounded surival function."""
death_censored = (
Expand Down
135 changes: 0 additions & 135 deletions notebooks/test.ipynb
Expand Up @@ -1657,141 +1657,6 @@
"table"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "3f016823",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>year</th>\n",
" <th>2010</th>\n",
" <th>2011</th>\n",
" <th>2012</th>\n",
" <th>2013</th>\n",
" <th>2014</th>\n",
" <th>2015</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>grant_type</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>138906688.0</td>\n",
" <td>127533696.0</td>\n",
" <td>171878704.0</td>\n",
" <td>203357200.0</td>\n",
" <td>206222208.0</td>\n",
" <td>133601200.0</td>\n",
" <td>9.814997e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>N</th>\n",
" <td>0.0</td>\n",
" <td>7192804.0</td>\n",
" <td>7779685.0</td>\n",
" <td>8728330.0</td>\n",
" <td>7858697.0</td>\n",
" <td>8501187.0</td>\n",
" <td>4.006070e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R</th>\n",
" <td>504137056.0</td>\n",
" <td>532464704.0</td>\n",
" <td>480105472.0</td>\n",
" <td>511361408.0</td>\n",
" <td>554594176.0</td>\n",
" <td>551457280.0</td>\n",
" <td>3.134120e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R/G</th>\n",
" <td>46544000.0</td>\n",
" <td>128380000.0</td>\n",
" <td>134480000.0</td>\n",
" <td>134125000.0</td>\n",
" <td>142766000.0</td>\n",
" <td>146228992.0</td>\n",
" <td>7.325240e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td>689587776.0</td>\n",
" <td>795571264.0</td>\n",
" <td>794243904.0</td>\n",
" <td>857571968.0</td>\n",
" <td>911441088.0</td>\n",
" <td>839788672.0</td>\n",
" <td>4.888204e+09</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"year 2010 2011 2012 2013 2014 \\\n",
"grant_type \n",
"G 138906688.0 127533696.0 171878704.0 203357200.0 206222208.0 \n",
"N 0.0 7192804.0 7779685.0 8728330.0 7858697.0 \n",
"R 504137056.0 532464704.0 480105472.0 511361408.0 554594176.0 \n",
"R/G 46544000.0 128380000.0 134480000.0 134125000.0 142766000.0 \n",
"All 689587776.0 795571264.0 794243904.0 857571968.0 911441088.0 \n",
"\n",
"year 2015 All \n",
"grant_type \n",
"G 133601200.0 9.814997e+08 \n",
"N 8501187.0 4.006070e+07 \n",
"R 551457280.0 3.134120e+09 \n",
"R/G 146228992.0 7.325240e+08 \n",
"All 839788672.0 4.888204e+09 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"table = pd.crosstab(\n",
" index=df[\"grant_type\"],\n",
" columns=df[\"year\"],\n",
" values=df[\"inc_grants\"],\n",
" aggfunc=\"sum\",\n",
" margins=True,\n",
")\n",
"table"
]
},
{
"cell_type": "code",
"execution_count": 18,
Expand Down
33 changes: 33 additions & 0 deletions test/test_initial.py
Expand Up @@ -169,6 +169,39 @@ def test_pivot_table_cols(data, acro):
shutil.rmtree(PATH)


def test_pivot_table_with_aggfunc_sum(data, acro):
"""Test the pivot table with two columns and aggfunc sum."""
acro = ACRO(suppress=False)
_ = acro.pivot_table(
data,
index="year",
columns=["grant_type", "survivor"],
values="inc_grants",
aggfunc="sum",
)
_ = acro.pivot_table(
data,
index=["grant_type", "survivor"],
columns="year",
values="inc_grants",
aggfunc="sum",
)
acro.add_exception("output_0", "Let me have it")
acro.add_exception("output_1", "I need this output")
results: Records = acro.finalise(PATH)
output_0 = results.get_index(0)
output_1 = results.get_index(1)
comment_0 = (
"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted."
)
comment_1 = (
"Empty rows: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted."
)
assert output_0.comments == [comment_0]
assert output_1.comments == [comment_1]
shutil.rmtree(PATH)


def test_ols(data, acro):
"""Ordinary Least Squares test."""
new_df = data[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
Expand Down

0 comments on commit 7a4fadb

Please sign in to comment.