BUG: On Join, with a list containing MultiIndexes check uniqueness of…

… index to join (pandas-dev#57676)
Dacops · May 8, 2024 · 611cc94 · 611cc94
1 parent 59f6a33
commit 611cc94
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 3 deletions.
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -477,6 +477,7 @@ Other
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
+- Bug in :meth:`DataFrame.join` on join, when deciding to concat or merge a list containing MultiIndexes check uniqueness of individual indexes (:issue:`57676`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
 - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -10679,7 +10679,19 @@ def join(
             # "Iterable[Union[DataFrame, Series]]" due to the if statements
             frames = [cast("DataFrame | Series", self)] + list(other)
 
-            can_concat = all(df.index.is_unique for df in frames)
+            # We might need to get indexes out of MultiIndexes, checking only the
+            # common indexes between the inserted frames
+            indexes = (set(df.index.names) for df in frames)
+            common_indexes = set.intersection(*indexes)
+
+            if not common_indexes:
+                raise ValueError("cannot join with no overlapping index names")
+
+            can_concat = False
+            for idx in common_indexes:
+                can_concat = all(
+                    df.index.get_level_values(idx).is_unique for df in frames
+                )
 
             # join indexes only using concat
             if can_concat:

diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py
@@ -107,8 +107,6 @@ def test_suffix_on_list_join():
 
     # check proper errors are raised
     msg = "Suffixes not supported when joining multiple DataFrames"
-    with pytest.raises(ValueError, match=msg):
-        first.join([second], lsuffix="y")
     with pytest.raises(ValueError, match=msg):
         first.join([second, third], rsuffix="x")
     with pytest.raises(ValueError, match=msg):
@@ -562,3 +560,21 @@ def test_frame_join_tzaware(self):
 
         tm.assert_index_equal(result.index, expected)
         assert result.index.tz.zone == "US/Central"
+
+    def test_join_lists_index_with_multiindex(self):
+        test1 = DataFrame(
+            {"cat": pd.Categorical(["a", "v", "d"])},
+            index=Index(["a", "b", "c"], name="y"),
+        )
+        test2 = DataFrame(
+            {"foo": np.arange(6)},
+            index=MultiIndex.from_tuples(
+                [(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b"), (1, "c")],
+                names=("x", "y"),
+            ),
+        )
+
+        result = test2.join([test1])
+        expected = test2.join(test1)
+
+        tm.assert_frame_equal(result, expected)