From de22f706d8bbe6d80a6ea2e9a5343b77e0695471 Mon Sep 17 00:00:00 2001 From: jbencook Date: Mon, 22 Dec 2014 20:16:42 -0600 Subject: [PATCH] [SPARK-4860][pyspark][sql] using sample() method from JavaSchemaRDD --- python/pyspark/sql.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index 94051990f8df1..9807a84a66f11 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -2095,9 +2095,8 @@ def sample(self, withReplacement, fraction, seed=None): """ assert fraction >= 0.0, "Negative fraction value: %s" % fraction seed = seed if seed is not None else random.randint(0, sys.maxint) - rdd = self._jschema_rdd.baseSchemaRDD().sample( - withReplacement, fraction, long(seed)) - return SchemaRDD(rdd.toJavaSchemaRDD(), self.sql_ctx) + rdd = self._jschema_rdd.sample(withReplacement, fraction, long(seed)) + return SchemaRDD(rdd, self.sql_ctx) def takeSample(self, withReplacement, num, seed=None): """Return a fixed-size sampled subset of this SchemaRDD.