From 4ee37edf063aa7b3c0e970fcf769d2d2755938e3 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Tue, 2 Jul 2024 19:26:18 +0900 Subject: [PATCH] [SPARK-48764][PYTHON] Filtering out IPython-related frames from user stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR proposes to fix internal function `_capture_call_site` for filtering out IPython-related frames from user stack. ### Why are the changes needed? IPython-related frames are unnecessarily polluting the user stacks so it harms debuggability of IPython Notebook. For example, there are some garbage stacks recorded from `IPython` and `ipykernel` such as: - `...lib/python3.9/site-packages/IPython/core/interactiveshell.py...` - `...lib/python3.9/site-packages/ipykernel/zmqshell.py...` ### Does this PR introduce _any_ user-facing change? No API changes, but the user stack from IPython will be cleaned up as below: **Before** Screenshot 2024-07-01 at 3 26 45 PM **After** Screenshot 2024-07-01 at 3 25 07 PM ### How was this patch tested? The existing CI should pass ### Was this patch authored or co-authored using generative AI tooling? No Closes #47159 from itholic/ipython_followup. Authored-by: Haejoon Lee Signed-off-by: Hyukjin Kwon --- python/pyspark/errors/utils.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/python/pyspark/errors/utils.py b/python/pyspark/errors/utils.py index 9155bfb54abe8..89721d23c3858 100644 --- a/python/pyspark/errors/utils.py +++ b/python/pyspark/errors/utils.py @@ -175,9 +175,20 @@ def _capture_call_site(spark_session: "SparkSession", depth: int) -> str: # We try import here since IPython is not a required dependency try: - from IPython import get_ipython - - ipython = get_ipython() + import IPython + + # ipykernel is required for IPython + import ipykernel # type: ignore[import-not-found] + + ipython = IPython.get_ipython() + # Filtering out IPython related frames + ipy_root = os.path.dirname(IPython.__file__) + ipykernel_root = os.path.dirname(ipykernel.__file__) + selected_frames = [ + frame + for frame in selected_frames + if (ipy_root not in frame.filename) and (ipykernel_root not in frame.filename) + ] except ImportError: ipython = None