From 267aaf9f1cdb133b54cfb09d411b1477f1d2f878 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 14 Mar 2016 14:07:40 -0700 Subject: [PATCH] Fix ClassNotFoundException errors in PySpark streaming tests. --- python/pyspark/streaming/tests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py index f4bbb1b12872f..5766d1e0882d4 100644 --- a/python/pyspark/streaming/tests.py +++ b/python/pyspark/streaming/tests.py @@ -1636,7 +1636,13 @@ def search_kinesis_asl_assembly_jar(): jars = "%s,%s,%s,%s,%s" % (kafka_assembly_jar, flume_assembly_jar, mqtt_assembly_jar, mqtt_test_jar, kinesis_asl_assembly_jar) - os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars %s pyspark-shell" % jars + # We need to set userClassPathFirst here because the streaming data source classes are also + # loadable from the root classloader (because of SPARK_PREPEND_CLASSES) but their dependencies + # are only present in the data source assembly JARs. + os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join([ + "--conf spark.driver.userClassPathFirst=true", + "--jars %s pyspark-shell" % jars, + ]) testcases = [BasicOperationTests, WindowFunctionTests, StreamingContextTests, CheckpointTests, KafkaStreamTests, FlumeStreamTests, FlumePollingStreamTests, MQTTStreamTests, StreamingListenerTests]