diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 2cb1780b6be824..f46229feee2503 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1817,6 +1817,7 @@ Reshaping - Bug in :func:`DataFrame.unstack` where a ``ValueError`` was raised when unstacking timezone aware values (:issue:`18338`) - Bug in :func:`DataFrame.stack` where timezone aware values were converted to timezone naive values (:issue:`19420`) - Bug in :func:`merge_asof` where a ``TypeError`` was raised when ``by_col`` were timezone aware values (:issue:`21184`) +- Bug showing an incorrect shape when throwing error during ``DataFrame`` construction. (:issue:`20742`) .. _whatsnew_0240.bug_fixes.sparse: @@ -1854,6 +1855,7 @@ Other - Bug where C variables were declared with external linkage causing import errors if certain other C libraries were imported before Pandas. (:issue:`24113`) + .. _whatsnew_0.24.0.contributors: Contributors diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ab033ff4c1c4bc..050c3d3e87fc61 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1674,7 +1674,15 @@ def create_block_manager_from_arrays(arrays, names, axes): def construction_error(tot_items, block_shape, axes, e=None): """ raise a helpful message about our construction """ passed = tuple(map(int, [tot_items] + list(block_shape))) - implied = tuple(map(int, [len(ax) for ax in axes])) + # Correcting the user facing error message during dataframe construction + if len(passed) <= 2: + passed = passed[::-1] + + implied = tuple(len(ax) for ax in axes) + # Correcting the user facing error message during dataframe construction + if len(implied) <= 2: + implied = implied[::-1] + if passed == implied and e is not None: raise e if block_shape[0] == 0: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c8b3f23db1492f..4e0143c368e105 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -386,25 +386,35 @@ def test_constructor_error_msgs(self): 'B': ['a', 'b', 'c']}) # wrong size ndarray, GH 3105 - msg = r"Shape of passed values is \(3, 4\), indices imply \(3, 3\)" + msg = r"Shape of passed values is \(4, 3\), indices imply \(3, 3\)" with pytest.raises(ValueError, match=msg): DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'], index=pd.date_range('2000-01-01', periods=3)) + arr = np.array([[4, 5, 6]]) + msg = r"Shape of passed values is \(1, 3\), indices imply \(1, 4\)" + with pytest.raises(ValueError, match=msg): + DataFrame(index=[0], columns=range(0, 4), data=arr) + + arr = np.array([4, 5, 6]) + msg = r"Shape of passed values is \(3, 1\), indices imply \(1, 4\)" + with pytest.raises(ValueError, match=msg): + DataFrame(index=[0], columns=range(0, 4), data=arr) + # higher dim raise exception with pytest.raises(ValueError, match='Must pass 2-d input'): DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) # wrong size axis labels msg = ("Shape of passed values " - r"is \(3, 2\), indices " - r"imply \(3, 1\)") + r"is \(2, 3\), indices " + r"imply \(1, 3\)") with pytest.raises(ValueError, match=msg): DataFrame(np.random.rand(2, 3), columns=['A', 'B', 'C'], index=[1]) msg = ("Shape of passed values " - r"is \(3, 2\), indices " + r"is \(2, 3\), indices " r"imply \(2, 2\)") with pytest.raises(ValueError, match=msg): DataFrame(np.random.rand(2, 3), columns=['A', 'B'], index=[1, 2]) @@ -638,10 +648,10 @@ def _check_basic_constructor(self, empty): assert frame.values.dtype == np.int64 # wrong size axis labels - msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' + msg = r'Shape of passed values is \(2, 3\), indices imply \(1, 3\)' with pytest.raises(ValueError, match=msg): DataFrame(mat, columns=['A', 'B', 'C'], index=[1]) - msg = r'Shape of passed values is \(3, 2\), indices imply \(2, 2\)' + msg = r'Shape of passed values is \(2, 3\), indices imply \(2, 2\)' with pytest.raises(ValueError, match=msg): DataFrame(mat, columns=['A', 'B'], index=[1, 2]) @@ -1805,7 +1815,7 @@ def test_from_records_to_records(self): tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2)) # wrong length - msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' + msg = r'Shape of passed values is \(2, 3\), indices imply \(1, 3\)' with pytest.raises(ValueError, match=msg): DataFrame.from_records(arr, index=index[:-1]) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 155083900f83a5..23c40276072d63 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -346,7 +346,7 @@ def test_frame_from_json_bad_data(self): json = StringIO('{"columns":["A","B"],' '"index":["2","3"],' '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') - msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" + msg = r"Shape of passed values is \(3, 2\), indices imply \(2, 2\)" with pytest.raises(ValueError, match=msg): read_json(json, orient="split")