Skip to content

Commit

Permalink
BUG: On Join, with a list containing MultiIndexes check uniqueness of…
Browse files Browse the repository at this point in the history
… index to join (pandas-dev#57676)
  • Loading branch information
Dacops committed May 8, 2024
1 parent 59f6a33 commit 611cc94
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ Other
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
- Bug in :meth:`DataFrame.join` on join, when deciding to concat or merge a list containing MultiIndexes check uniqueness of individual indexes (:issue:`57676`)
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
Expand Down
14 changes: 13 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10679,7 +10679,19 @@ def join(
# "Iterable[Union[DataFrame, Series]]" due to the if statements
frames = [cast("DataFrame | Series", self)] + list(other)

can_concat = all(df.index.is_unique for df in frames)
# We might need to get indexes out of MultiIndexes, checking only the
# common indexes between the inserted frames
indexes = (set(df.index.names) for df in frames)
common_indexes = set.intersection(*indexes)

if not common_indexes:
raise ValueError("cannot join with no overlapping index names")

can_concat = False
for idx in common_indexes:
can_concat = all(
df.index.get_level_values(idx).is_unique for df in frames
)

# join indexes only using concat
if can_concat:
Expand Down
20 changes: 18 additions & 2 deletions pandas/tests/frame/methods/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ def test_suffix_on_list_join():

# check proper errors are raised
msg = "Suffixes not supported when joining multiple DataFrames"
with pytest.raises(ValueError, match=msg):
first.join([second], lsuffix="y")
with pytest.raises(ValueError, match=msg):
first.join([second, third], rsuffix="x")
with pytest.raises(ValueError, match=msg):
Expand Down Expand Up @@ -562,3 +560,21 @@ def test_frame_join_tzaware(self):

tm.assert_index_equal(result.index, expected)
assert result.index.tz.zone == "US/Central"

def test_join_lists_index_with_multiindex(self):
test1 = DataFrame(
{"cat": pd.Categorical(["a", "v", "d"])},
index=Index(["a", "b", "c"], name="y"),
)
test2 = DataFrame(
{"foo": np.arange(6)},
index=MultiIndex.from_tuples(
[(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b"), (1, "c")],
names=("x", "y"),
),
)

result = test2.join([test1])
expected = test2.join(test1)

tm.assert_frame_equal(result, expected)

0 comments on commit 611cc94

Please sign in to comment.