[SPARK-4860][pyspark][sql] using sample() method from JavaSchemaRDD

JasonMWhite · Dec 23, 2014 · de22f70 · de22f70
1 parent b916442
commit de22f70
Showing 1 changed file with 2 additions and 3 deletions.
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
@@ -2095,9 +2095,8 @@ def sample(self, withReplacement, fraction, seed=None):
         """
         assert fraction >= 0.0, "Negative fraction value: %s" % fraction
         seed = seed if seed is not None else random.randint(0, sys.maxint)
-        rdd = self._jschema_rdd.baseSchemaRDD().sample(
-            withReplacement, fraction, long(seed))
-        return SchemaRDD(rdd.toJavaSchemaRDD(), self.sql_ctx)
+        rdd = self._jschema_rdd.sample(withReplacement, fraction, long(seed))
+        return SchemaRDD(rdd, self.sql_ctx)
 
     def takeSample(self, withReplacement, num, seed=None):
         """Return a fixed-size sampled subset of this SchemaRDD.