Skip to content

Commit

Permalink
[SPARK-48751][INFRA][PYTHON][TESTS] Re-balance `pyspark-pandas-connec…
Browse files Browse the repository at this point in the history
…t` tests on GA

### What changes were proposed in this pull request?
The pr aims to `re-balance` `pyspark-pandas-connect` tests on `GA`.

### Why are the changes needed?
Make the execution cost time of `pyspark-pandas-connect-part[0-3]` testing to a relatively average level, avoiding the occurrence of long tails and resulting in higher overall GA execution cost time.

Here are some currently observed examples:
- https://github.com/apache/spark/pull/47135/checks?check_run_id=26784966983
  <img width="311" alt="image" src="https://github.com/apache/spark/assets/15246973/45d627bc-f0e7-4a76-bfd5-edc6e821e427">

  Most of them are around `1 hour`, but `part2` cost `1h 49m`, `part3` cost `2h 16m`

- https://github.com/panbingkun/spark/actions/runs/9693237300
  <img width="296" alt="image" src="https://github.com/apache/spark/assets/15246973/6837622a-3ff3-42d7-9725-e548c161277e">
  Most of them are around `1 hour`, but `part2` cost `1h 47m`, `part3` cost `2h 20m`

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually observing the cost time of `pyspark-pandas-connect-part[0-3]`.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes apache#47137 from panbingkun/split_pyspark_tests_to_5.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
panbingkun authored and HyukjinKwon committed Jun 30, 2024
1 parent 6bfeb09 commit f49418b
Showing 1 changed file with 21 additions and 21 deletions.
42 changes: 21 additions & 21 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,9 @@ def __hash__(self):
"pyspark.pandas.tests.connect.indexes.test_parity_reindex",
"pyspark.pandas.tests.connect.indexes.test_parity_rename",
"pyspark.pandas.tests.connect.indexes.test_parity_reset_index",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_at",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_between",
"pyspark.pandas.tests.connect.computation.test_parity_any_all",
"pyspark.pandas.tests.connect.computation.test_parity_apply_func",
"pyspark.pandas.tests.connect.computation.test_parity_binary_ops",
Expand All @@ -1188,6 +1191,12 @@ def __hash__(self):
"pyspark.pandas.tests.connect.computation.test_parity_describe",
"pyspark.pandas.tests.connect.computation.test_parity_eval",
"pyspark.pandas.tests.connect.computation.test_parity_melt",
"pyspark.pandas.tests.connect.computation.test_parity_missing_data",
"pyspark.pandas.tests.connect.groupby.test_parity_stat",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_adv",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_func",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_prod",
],
excluded_python_implementations=[
"PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
Expand Down Expand Up @@ -1253,6 +1262,18 @@ def __hash__(self):
"pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_object",
"pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_prefix",
"pyspark.pandas.tests.connect.reshape.test_parity_merge_asof",
"pyspark.pandas.tests.connect.indexes.test_parity_append",
"pyspark.pandas.tests.connect.indexes.test_parity_intersection",
"pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
"pyspark.pandas.tests.connect.indexes.test_parity_union",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_map",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_round",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform",
# fallback
"pyspark.pandas.tests.connect.frame.test_parity_asfreq",
"pyspark.pandas.tests.connect.frame.test_parity_asof",
Expand All @@ -1278,7 +1299,6 @@ def __hash__(self):
"pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx",
"pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv",
"pyspark.pandas.tests.connect.computation.test_parity_stats",
"pyspark.pandas.tests.connect.computation.test_parity_missing_data",
"pyspark.pandas.tests.connect.frame.test_parity_interpolate",
"pyspark.pandas.tests.connect.frame.test_parity_interpolate_error",
"pyspark.pandas.tests.connect.resample.test_parity_frame",
Expand Down Expand Up @@ -1351,24 +1371,6 @@ def __hash__(self):
"pyspark.pandas.tests.connect.io.test_parity_dataframe_conversion",
"pyspark.pandas.tests.connect.io.test_parity_dataframe_spark_io",
"pyspark.pandas.tests.connect.io.test_parity_series_conversion",
"pyspark.pandas.tests.connect.groupby.test_parity_stat",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_adv",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_func",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_prod",
"pyspark.pandas.tests.connect.indexes.test_parity_append",
"pyspark.pandas.tests.connect.indexes.test_parity_intersection",
"pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
"pyspark.pandas.tests.connect.indexes.test_parity_union",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_at",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_between",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_map",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_round",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext_float",
Expand All @@ -1391,9 +1393,7 @@ def __hash__(self):
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_diff_len",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_fillna",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_filter",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_split_apply_combine",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_adv",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_count",
Expand Down

0 comments on commit f49418b

Please sign in to comment.