Skip to content

Commit

Permalink
BUG: fixed merging with empty frame containing an Int64 column (panda…
Browse files Browse the repository at this point in the history
  • Loading branch information
josham authored and Pingviinituutti committed Feb 28, 2019
1 parent 2c49951 commit 4cbed72
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 1 deletion.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Bug Fixes
**Other**

- Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`)
-
- Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`)
-

.. _whatsnew_0.242.contributors:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
pass
elif getattr(self.block, 'is_sparse', False):
pass
elif getattr(self.block, 'is_extension', False):
pass
else:
missing_arr = np.empty(self.shape, dtype=empty_dtype)
missing_arr.fill(fill_value)
Expand Down
78 changes: 78 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,54 @@ def get_test_data(ngroups=NGROUPS, n=N):
return arr


def get_series():
return [
pd.Series([1], dtype='int64'),
pd.Series([1], dtype='Int64'),
pd.Series([1.23]),
pd.Series(['foo']),
pd.Series([True]),
pd.Series([pd.Timestamp('2018-01-01')]),
pd.Series([pd.Timestamp('2018-01-01', tz='US/Eastern')]),
]


def get_series_na():
return [
pd.Series([np.nan], dtype='Int64'),
pd.Series([np.nan], dtype='float'),
pd.Series([np.nan], dtype='object'),
pd.Series([pd.NaT]),
]


@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name)
def series_of_dtype(request):
"""
A parametrized fixture returning a variety of Series of different
dtypes
"""
return request.param


@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name)
def series_of_dtype2(request):
"""
A duplicate of the series_of_dtype fixture, so that it can be used
twice by a single function
"""
return request.param


@pytest.fixture(params=get_series_na(), ids=lambda x: x.dtype.name)
def series_of_dtype_all_na(request):
"""
A parametrized fixture returning a variety of Series with all NA
values
"""
return request.param


class TestMerge(object):

def setup_method(self, method):
Expand Down Expand Up @@ -428,6 +476,36 @@ def check2(exp, kwarg):
check1(exp_in, kwarg)
check2(exp_out, kwarg)

def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2):
# GH 25183
df = pd.DataFrame({'key': series_of_dtype, 'value': series_of_dtype2},
columns=['key', 'value'])
df_empty = df[:0]
expected = pd.DataFrame({
'value_x': pd.Series(dtype=df.dtypes['value']),
'key': pd.Series(dtype=df.dtypes['key']),
'value_y': pd.Series(dtype=df.dtypes['value']),
}, columns=['value_x', 'key', 'value_y'])
actual = df_empty.merge(df, on='key')
assert_frame_equal(actual, expected)

def test_merge_all_na_column(self, series_of_dtype,
series_of_dtype_all_na):
# GH 25183
df_left = pd.DataFrame(
{'key': series_of_dtype, 'value': series_of_dtype_all_na},
columns=['key', 'value'])
df_right = pd.DataFrame(
{'key': series_of_dtype, 'value': series_of_dtype_all_na},
columns=['key', 'value'])
expected = pd.DataFrame({
'key': series_of_dtype,
'value_x': series_of_dtype_all_na,
'value_y': series_of_dtype_all_na,
}, columns=['key', 'value_x', 'value_y'])
actual = df_left.merge(df_right, on='key')
assert_frame_equal(actual, expected)

def test_merge_nosort(self):
# #2098, anything to do?

Expand Down

0 comments on commit 4cbed72

Please sign in to comment.