Skip to content

Commit

Permalink
Merge pull request #172 from jeffbrennan/fix_column_to_list_unit_tests
Browse files Browse the repository at this point in the history
handle pyspark < 3.3.0
  • Loading branch information
jeffbrennan committed Jan 21, 2024
2 parents 77113b2 + bb64705 commit cf06721
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions quinn/dataframe_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,16 @@ def column_to_list(df: DataFrame, col_name: str) -> list[Any]:
:rtype: List[Any]
"""
pyarrow_kv = ("spark.sql.execution.arrow.pyspark.enabled", "true")

if "pyspark" not in sys.modules:
raise ImportError

# sparksession from df is not available in older versions of pyspark
if sys.modules["pyspark"].__version__ < "3.3.0":
return df.select(col_name).rdd.flatMap(lambda x: x).collect()

spark_config = df.sparkSession.sparkContext.getConf().getAll()

pyarrow_enabled: bool = pyarrow_kv in spark_config
pyarrow_valid = pyarrow_enabled and sys.modules["pyarrow"] >= "0.17.0"

Expand Down

0 comments on commit cf06721

Please sign in to comment.