Skip to content

Commit

Permalink
Melting with not present column does not produce error (pandas-dev#23575
Browse files Browse the repository at this point in the history
)
  • Loading branch information
michaelsilverstein authored and Pingviinituutti committed Feb 28, 2019
1 parent a396e79 commit 88f9b80
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1442,6 +1442,7 @@ Reshaping
- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue:`22796`)
- Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`)
- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have a :class:`MultiIndex` for columns (:issue:`23033`).
- Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`)
- Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`)
- Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`)

Expand Down
19 changes: 19 additions & 0 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas import compat
from pandas.core.arrays import Categorical
from pandas.core.frame import _shared_docs
from pandas.core.indexes.base import Index
from pandas.core.reshape.concat import concat
from pandas.core.tools.numeric import to_numeric

Expand All @@ -24,6 +25,12 @@
def melt(frame, id_vars=None, value_vars=None, var_name=None,
value_name='value', col_level=None):
# TODO: what about the existing index?
# If multiindex, gather names of columns on all level for checking presence
# of `id_vars` and `value_vars`
if isinstance(frame.columns, ABCMultiIndex):
cols = [x for c in frame.columns for x in c]
else:
cols = list(frame.columns)
if id_vars is not None:
if not is_list_like(id_vars):
id_vars = [id_vars]
Expand All @@ -32,7 +39,13 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
raise ValueError('id_vars must be a list of tuples when columns'
' are a MultiIndex')
else:
# Check that `id_vars` are in frame
id_vars = list(id_vars)
missing = Index(np.ravel(id_vars)).difference(cols)
if not missing.empty:
raise KeyError("The following 'id_vars' are not present"
" in the DataFrame: {missing}"
"".format(missing=list(missing)))
else:
id_vars = []

Expand All @@ -45,6 +58,12 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
' columns are a MultiIndex')
else:
value_vars = list(value_vars)
# Check that `value_vars` are in frame
missing = Index(np.ravel(value_vars)).difference(cols)
if not missing.empty:
raise KeyError("The following 'value_vars' are not present in"
" the DataFrame: {missing}"
"".format(missing=list(missing)))
frame = frame.loc[:, id_vars + value_vars]
else:
frame = frame.copy()
Expand Down
51 changes: 51 additions & 0 deletions pandas/tests/reshape/test_melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,14 @@ def test_vars_work_with_multiindex(self):
result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')])
tm.assert_frame_equal(result, expected)

def test_single_vars_work_with_multiindex(self):
expected = DataFrame({
'A': {0: 1.067683, 1: -1.321405, 2: -0.807333},
'CAP': {0: 'B', 1: 'B', 2: 'B'},
'value': {0: -1.110463, 1: 0.368915, 2: 0.08298}})
result = self.df1.melt(['A'], ['B'], col_level=0)
tm.assert_frame_equal(result, expected)

def test_tuple_vars_fail_with_multiindex(self):
# melt should fail with an informative error message if
# the columns have a MultiIndex and a tuple is passed
Expand Down Expand Up @@ -233,6 +241,49 @@ def test_pandas_dtypes(self, col):
expected.columns = ['klass', 'col', 'attribute', 'value']
tm.assert_frame_equal(result, expected)

def test_melt_missing_columns_raises(self):
# GH-23575
# This test is to ensure that pandas raises an error if melting is
# attempted with column names absent from the dataframe

# Generate data
df = pd.DataFrame(np.random.randn(5, 4), columns=list('abcd'))

# Try to melt with missing `value_vars` column name
msg = "The following '{Var}' are not present in the DataFrame: {Col}"
with pytest.raises(
KeyError,
match=msg.format(Var='value_vars', Col="\\['C'\\]")):
df.melt(['a', 'b'], ['C', 'd'])

# Try to melt with missing `id_vars` column name
with pytest.raises(
KeyError,
match=msg.format(Var='id_vars', Col="\\['A'\\]")):
df.melt(['A', 'b'], ['c', 'd'])

# Multiple missing
with pytest.raises(
KeyError,
match=msg.format(Var='id_vars',
Col="\\['not_here', 'or_there'\\]")):
df.melt(['a', 'b', 'not_here', 'or_there'], ['c', 'd'])

# Multiindex melt fails if column is missing from multilevel melt
multi = df.copy()
multi.columns = [list('ABCD'), list('abcd')]
with pytest.raises(
KeyError,
match=msg.format(Var='id_vars',
Col="\\['E'\\]")):
multi.melt([('E', 'a')], [('B', 'b')])
# Multiindex fails if column is missing from single level melt
with pytest.raises(
KeyError,
match=msg.format(Var='value_vars',
Col="\\['F'\\]")):
multi.melt(['A'], ['F'], col_level=0)


class TestLreshape(object):

Expand Down

0 comments on commit 88f9b80

Please sign in to comment.