In [3]:
import pandas as pd
import numpy as np
import pyspark
spark = pyspark.sql.SparkSession.builder.getOrCreate()

np.random.seed(13)

pandas_dataframe = pd.DataFrame({
    "n": np.random.randn(20),
    "group": np.random.choice(list("xyz"), 20),
    "abool": np.random.choice([True, False], 20),
})

In [4]:
df = spark.createDataFrame(pandas_dataframe)

In [5]:
df.show(3)

+--------------------+-----+-----+
|                   n|group|abool|
+--------------------+-----+-----+
|  -0.712390662050588|    z|false|
|   0.753766378659703|    x|false|
|-0.04450307833805...|    z|false|
+--------------------+-----+-----+
only showing top 3 rows



In [6]:
df.show(7)

+--------------------+-----+-----+
|                   n|group|abool|
+--------------------+-----+-----+
|  -0.712390662050588|    z|false|
|   0.753766378659703|    x|false|
|-0.04450307833805...|    z|false|
| 0.45181233874578974|    y|false|
|  1.3451017084510097|    z|false|
|  0.5323378882945463|    y|false|
|  1.3501878997225267|    z|false|
+--------------------+-----+-----+
only showing top 7 rows



In [7]:
df.describe().show()

+-------+-------------------+-----+
|summary|                  n|group|
+-------+-------------------+-----+
|  count|                 20|   20|
|   mean|0.36640264498852165| null|
| stddev| 0.8905322898155364| null|
|    min| -1.261605945319069|    x|
|    max| 2.1503829673811126|    z|
+-------+-------------------+-----+



In [8]:
df.select('n', 'abool').show(5)

+--------------------+-----+
|                   n|abool|
+--------------------+-----+
|  -0.712390662050588|false|
|   0.753766378659703|false|
|-0.04450307833805...|false|
| 0.45181233874578974|false|
|  1.3451017084510097|false|
+--------------------+-----+
only showing top 5 rows



In [9]:
df.select('group', 'abool').show(5)

+-----+-----+
|group|abool|
+-----+-----+
|    z|false|
|    x|false|
|    z|false|
|    y|false|
|    z|false|
+-----+-----+
only showing top 5 rows



In [10]:
df.select('group', df.abool.alias('a_boolean_value')).show(3)

+-----+---------------+
|group|a_boolean_value|
+-----+---------------+
|    z|          false|
|    x|          false|
|    z|          false|
+-----+---------------+
only showing top 3 rows



In [11]:
from pyspark.sql.functions import col

df.select('group', col('n').alias('a_numeric_value')).show(6)

+-----+--------------------+
|group|     a_numeric_value|
+-----+--------------------+
|    z|  -0.712390662050588|
|    x|   0.753766378659703|
|    z|-0.04450307833805...|
|    y| 0.45181233874578974|
|    z|  1.3451017084510097|
|    y|  0.5323378882945463|
+-----+--------------------+
only showing top 6 rows



In [12]:
df.select('n', df.n + 4).show()

+--------------------+------------------+
|                   n|           (n + 4)|
+--------------------+------------------+
|  -0.712390662050588|3.2876093379494122|
|   0.753766378659703| 4.753766378659703|
|-0.04450307833805...|3.9554969216619464|
| 0.45181233874578974|  4.45181233874579|
|  1.3451017084510097|5.3451017084510095|
|  0.5323378882945463| 4.532337888294546|
|  1.3501878997225267| 5.350187899722527|
|  0.8612113741693206|  4.86121137416932|
|  1.4786857374358966| 5.478685737435897|
| -1.0453771305385342| 2.954622869461466|
| -0.7889890249515489|3.2110109750484512|
|  -1.261605945319069| 2.738394054680931|
|  0.5628467852810314| 4.562846785281032|
|-0.24332625188556253|3.7566737481144377|
|  0.9137407048596775| 4.913740704859677|
| 0.31735092273633597| 4.317350922736336|
| 0.12730328020698067| 4.127303280206981|
|  2.1503829673811126| 6.150382967381113|
|  0.6062886568962988| 4.606288656896298|
|-0.02677164998644...|3.9732283500135592|
+--------------------+------------

In [13]:
df.select('n', df.n - 5).show()

+--------------------+-------------------+
|                   n|            (n - 5)|
+--------------------+-------------------+
|  -0.712390662050588| -5.712390662050588|
|   0.753766378659703| -4.246233621340297|
|-0.04450307833805...| -5.044503078338053|
| 0.45181233874578974|  -4.54818766125421|
|  1.3451017084510097|-3.6548982915489905|
|  0.5323378882945463| -4.467662111705454|
|  1.3501878997225267|-3.6498121002774733|
|  0.8612113741693206|  -4.13878862583068|
|  1.4786857374358966| -3.521314262564103|
| -1.0453771305385342| -6.045377130538534|
| -0.7889890249515489| -5.788989024951549|
|  -1.261605945319069| -6.261605945319069|
|  0.5628467852810314| -4.437153214718968|
|-0.24332625188556253| -5.243326251885563|
|  0.9137407048596775| -4.086259295140323|
| 0.31735092273633597| -4.682649077263664|
| 0.12730328020698067| -4.872696719793019|
|  2.1503829673811126|-2.8496170326188874|
|  0.6062886568962988| -4.393711343103702|
|-0.02677164998644...| -5.026771649986441|
+----------

In [14]:
df.select('n', df.n * 5).show()

+--------------------+--------------------+
|                   n|             (n * 5)|
+--------------------+--------------------+
|  -0.712390662050588| -3.5619533102529397|
|   0.753766378659703|  3.7688318932985148|
|-0.04450307833805...|-0.22251539169026727|
| 0.45181233874578974|   2.259061693728949|
|  1.3451017084510097|  6.7255085422550485|
|  0.5323378882945463|  2.6616894414727317|
|  1.3501878997225267|   6.750939498612634|
|  0.8612113741693206|   4.306056870846603|
|  1.4786857374358966|   7.393428687179483|
| -1.0453771305385342|  -5.226885652692671|
| -0.7889890249515489| -3.9449451247577443|
|  -1.261605945319069|  -6.308029726595345|
|  0.5628467852810314|   2.814233926405157|
|-0.24332625188556253| -1.2166312594278126|
|  0.9137407048596775|   4.568703524298387|
| 0.31735092273633597|    1.58675461368168|
| 0.12730328020698067|  0.6365164010349034|
|  2.1503829673811126|  10.751914836905563|
|  0.6062886568962988|  3.0314432844814942|
|-0.02677164998644...|-0.1338582

In [15]:
df = df.select('*', (df.n * -1).alias('n2'))
df.show(4)

+--------------------+-----+-----+--------------------+
|                   n|group|abool|                  n2|
+--------------------+-----+-----+--------------------+
|  -0.712390662050588|    z|false|   0.712390662050588|
|   0.753766378659703|    x|false|  -0.753766378659703|
|-0.04450307833805...|    z|false|0.044503078338053455|
| 0.45181233874578974|    y|false|-0.45181233874578974|
+--------------------+-----+-----+--------------------+
only showing top 4 rows



In [16]:
df = df.select('*', (df.n * df.n).alias('n3'))
df.show(5)

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
|  -0.712390662050588|    z|false|   0.712390662050588|   0.507500455376875|
|   0.753766378659703|    x|false|  -0.753766378659703|  0.5681637535977627|
|-0.04450307833805...|    z|false|0.044503078338053455|0.001980523981562...|
| 0.45181233874578974|    y|false|-0.45181233874578974| 0.20413438944294027|
|  1.3451017084510097|    z|false| -1.3451017084510097|  1.8092986060778251|
+--------------------+-----+-----+--------------------+--------------------+
only showing top 5 rows



In [19]:
df.group + df.abool

Column<'(group + abool)'>

In [20]:
df.select(df.group + df.abool)

AnalysisException: cannot resolve '(CAST(group AS DOUBLE) + abool)' due to data type mismatch: differing types in '(CAST(group AS DOUBLE) + abool)' (double and boolean).;
'Project [unresolvedalias((cast(group#1 as double) + abool#2), Some(org.apache.spark.sql.Column$$Lambda$3094/0x000000080131f840@304a11e7))]
+- Project [n#0, group#1, abool#2, n2#241, (n#0 * n#0) AS n3#263]
   +- Project [n#0, group#1, abool#2, (n#0 * cast(-1 as double)) AS n2#241]
      +- LogicalRDD [n#0, group#1, abool#2], false


In [21]:
df.printSchema()

root
 |-- n: double (nullable = true)
 |-- group: string (nullable = true)
 |-- abool: boolean (nullable = true)
 |-- n2: double (nullable = true)
 |-- n3: double (nullable = true)



In [22]:
df.dtypes

[('n', 'double'),
 ('group', 'string'),
 ('abool', 'boolean'),
 ('n2', 'double'),
 ('n3', 'double')]

In [23]:
df.abool.cast('int')

Column<'CAST(abool AS INT)'>

In [24]:
df.select(df.abool.cast('int')).show()

+-----+
|abool|
+-----+
|    0|
|    0|
|    0|
|    0|
|    0|
|    0|
|    0|
|    0|
|    1|
|    1|
|    0|
|    0|
|    1|
|    1|
|    0|
|    0|
|    0|
|    1|
|    0|
|    1|
+-----+



In [25]:
df.select('abool', df.abool.cast('int')).show()

+-----+-----+
|abool|abool|
+-----+-----+
|false|    0|
|false|    0|
|false|    0|
|false|    0|
|false|    0|
|false|    0|
|false|    0|
|false|    0|
| true|    1|
| true|    1|
|false|    0|
|false|    0|
| true|    1|
| true|    1|
|false|    0|
|false|    0|
|false|    0|
| true|    1|
|false|    0|
| true|    1|
+-----+-----+



In [26]:
df.select('group', df.group.cast('int')).show()

+-----+-----+
|group|group|
+-----+-----+
|    z| null|
|    x| null|
|    z| null|
|    y| null|
|    z| null|
|    y| null|
|    z| null|
|    x| null|
|    z| null|
|    y| null|
|    x| null|
|    y| null|
|    y| null|
|    y| null|
|    y| null|
|    x| null|
|    z| null|
|    y| null|
|    x| null|
|    x| null|
+-----+-----+



In [27]:
df.select('n', df.n.cast('int')).show()

+--------------------+---+
|                   n|  n|
+--------------------+---+
|  -0.712390662050588|  0|
|   0.753766378659703|  0|
|-0.04450307833805...|  0|
| 0.45181233874578974|  0|
|  1.3451017084510097|  1|
|  0.5323378882945463|  0|
|  1.3501878997225267|  1|
|  0.8612113741693206|  0|
|  1.4786857374358966|  1|
| -1.0453771305385342| -1|
| -0.7889890249515489|  0|
|  -1.261605945319069| -1|
|  0.5628467852810314|  0|
|-0.24332625188556253|  0|
|  0.9137407048596775|  0|
| 0.31735092273633597|  0|
| 0.12730328020698067|  0|
|  2.1503829673811126|  2|
|  0.6062886568962988|  0|
|-0.02677164998644...|  0|
+--------------------+---+



In [28]:
df.select('abool', df.abool.cast('string')).show()

+-----+-----+
|abool|abool|
+-----+-----+
|false|false|
|false|false|
|false|false|
|false|false|
|false|false|
|false|false|
|false|false|
|false|false|
| true| true|
| true| true|
|false|false|
|false|false|
| true| true|
| true| true|
|false|false|
|false|false|
|false|false|
| true| true|
|false|false|
| true| true|
+-----+-----+



In [29]:
from pyspark.sql.functions import min, max, mean, lit, concat

In [30]:
df.select(max('n'), min('n'), mean('n')).show()

+------------------+------------------+-------------------+
|            max(n)|            min(n)|             avg(n)|
+------------------+------------------+-------------------+
|2.1503829673811126|-1.261605945319069|0.36640264498852165|
+------------------+------------------+-------------------+



In [31]:
df.select(concat(lit('Group: '), 'group'))

DataFrame[concat(Group: , group): string]

In [32]:
df.select(concat('group', lit(': '), 'n')).show()

+--------------------+
|concat(group, : , n)|
+--------------------+
|z: -0.71239066205...|
|x: 0.753766378659703|
|z: -0.04450307833...|
|y: 0.451812338745...|
|z: 1.345101708451...|
|y: 0.532337888294...|
|z: 1.350187899722...|
|x: 0.861211374169...|
|z: 1.478685737435...|
|y: -1.04537713053...|
|x: -0.78898902495...|
|y: -1.26160594531...|
|y: 0.562846785281...|
|y: -0.24332625188...|
|y: 0.913740704859...|
|x: 0.317350922736...|
|z: 0.127303280206...|
|y: 2.150382967381...|
|x: 0.606288656896...|
|x: -0.02677164998...|
+--------------------+



In [33]:
df.select(when(df.abool, 'It is true').otherwise('It is false')).show()

NameError: name 'when' is not defined

In [34]:
df.filter(df.group == 'y').show()

+--------------------+-----+-----+--------------------+-------------------+
|                   n|group|abool|                  n2|                 n3|
+--------------------+-----+-----+--------------------+-------------------+
| 0.45181233874578974|    y|false|-0.45181233874578974|0.20413438944294027|
|  0.5323378882945463|    y|false| -0.5323378882945463| 0.2833836273138969|
| -1.0453771305385342|    y| true|  1.0453771305385342| 1.0928133450529796|
|  -1.261605945319069|    y|false|   1.261605945319069|  1.591649561264422|
|  0.5628467852810314|    y| true| -0.5628467852810314|0.31679650370119145|
|-0.24332625188556253|    y| true| 0.24332625188556253|0.05920766485667622|
|  0.9137407048596775|    y|false| -0.9137407048596775| 0.8349220757174602|
|  2.1503829673811126|    y| true| -2.1503829673811126|  4.624146906402799|
+--------------------+-----+-----+--------------------+-------------------+



In [35]:
df.filter(~ df.abool).show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
|  -0.712390662050588|    z|false|   0.712390662050588|   0.507500455376875|
|   0.753766378659703|    x|false|  -0.753766378659703|  0.5681637535977627|
|-0.04450307833805...|    z|false|0.044503078338053455|0.001980523981562...|
| 0.45181233874578974|    y|false|-0.45181233874578974| 0.20413438944294027|
|  1.3451017084510097|    z|false| -1.3451017084510097|  1.8092986060778251|
|  0.5323378882945463|    y|false| -0.5323378882945463|  0.2833836273138969|
|  1.3501878997225267|    z|false| -1.3501878997225267|  1.8230073645571279|
|  0.8612113741693206|    x|false| -0.8612113741693206|  0.7416850309986095|
| -0.7889890249515489|    x|false|  0.7889890249515489|  0.6225036814939958|
|  -1.261605945319069|    y|false|   1.261605945319069|   1.591649561264422|

In [36]:
df.filter(df.group != 'y').show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
|  -0.712390662050588|    z|false|   0.712390662050588|   0.507500455376875|
|   0.753766378659703|    x|false|  -0.753766378659703|  0.5681637535977627|
|-0.04450307833805...|    z|false|0.044503078338053455|0.001980523981562...|
|  1.3451017084510097|    z|false| -1.3451017084510097|  1.8092986060778251|
|  1.3501878997225267|    z|false| -1.3501878997225267|  1.8230073645571279|
|  0.8612113741693206|    x|false| -0.8612113741693206|  0.7416850309986095|
|  1.4786857374358966|    z| true| -1.4786857374358966|  2.1865115100963415|
| -0.7889890249515489|    x|false|  0.7889890249515489|  0.6225036814939958|
| 0.31735092273633597|    x|false|-0.31735092273633597| 0.10071160816160388|
| 0.12730328020698067|    z|false|-0.12730328020698067|0.016206125151457036|

In [37]:
df.filter(df.n > 0).show()

+-------------------+-----+-----+--------------------+--------------------+
|                  n|group|abool|                  n2|                  n3|
+-------------------+-----+-----+--------------------+--------------------+
|  0.753766378659703|    x|false|  -0.753766378659703|  0.5681637535977627|
|0.45181233874578974|    y|false|-0.45181233874578974| 0.20413438944294027|
| 1.3451017084510097|    z|false| -1.3451017084510097|  1.8092986060778251|
| 0.5323378882945463|    y|false| -0.5323378882945463|  0.2833836273138969|
| 1.3501878997225267|    z|false| -1.3501878997225267|  1.8230073645571279|
| 0.8612113741693206|    x|false| -0.8612113741693206|  0.7416850309986095|
| 1.4786857374358966|    z| true| -1.4786857374358966|  2.1865115100963415|
| 0.5628467852810314|    y| true| -0.5628467852810314| 0.31679650370119145|
| 0.9137407048596775|    y|false| -0.9137407048596775|  0.8349220757174602|
|0.31735092273633597|    x|false|-0.31735092273633597| 0.10071160816160388|
|0.127303280

In [38]:
df.filter(df.abool & (df.group == 'z')).show()

+------------------+-----+-----+-------------------+------------------+
|                 n|group|abool|                 n2|                n3|
+------------------+-----+-----+-------------------+------------------+
|1.4786857374358966|    z| true|-1.4786857374358966|2.1865115100963415|
+------------------+-----+-----+-------------------+------------------+



In [39]:
df.filter(df.abool | (df.group == 'z')).show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
|  -0.712390662050588|    z|false|   0.712390662050588|   0.507500455376875|
|-0.04450307833805...|    z|false|0.044503078338053455|0.001980523981562...|
|  1.3451017084510097|    z|false| -1.3451017084510097|  1.8092986060778251|
|  1.3501878997225267|    z|false| -1.3501878997225267|  1.8230073645571279|
|  1.4786857374358966|    z| true| -1.4786857374358966|  2.1865115100963415|
| -1.0453771305385342|    y| true|  1.0453771305385342|  1.0928133450529796|
|  0.5628467852810314|    y| true| -0.5628467852810314| 0.31679650370119145|
|-0.24332625188556253|    y| true| 0.24332625188556253| 0.05920766485667622|
| 0.12730328020698067|    z|false|-0.12730328020698067|0.016206125151457036|
|  2.1503829673811126|    y| true| -2.1503829673811126|   4.624146906402799|

In [40]:
df.filter(~ df.abool & (df.n < 1)).show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
|  -0.712390662050588|    z|false|   0.712390662050588|   0.507500455376875|
|   0.753766378659703|    x|false|  -0.753766378659703|  0.5681637535977627|
|-0.04450307833805...|    z|false|0.044503078338053455|0.001980523981562...|
| 0.45181233874578974|    y|false|-0.45181233874578974| 0.20413438944294027|
|  0.5323378882945463|    y|false| -0.5323378882945463|  0.2833836273138969|
|  0.8612113741693206|    x|false| -0.8612113741693206|  0.7416850309986095|
| -0.7889890249515489|    x|false|  0.7889890249515489|  0.6225036814939958|
|  -1.261605945319069|    y|false|   1.261605945319069|   1.591649561264422|
|  0.9137407048596775|    y|false| -0.9137407048596775|  0.8349220757174602|
| 0.31735092273633597|    x|false|-0.31735092273633597| 0.10071160816160388|

In [41]:
df.filter(~ df.abool | (df.n < 1)).show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
|  -0.712390662050588|    z|false|   0.712390662050588|   0.507500455376875|
|   0.753766378659703|    x|false|  -0.753766378659703|  0.5681637535977627|
|-0.04450307833805...|    z|false|0.044503078338053455|0.001980523981562...|
| 0.45181233874578974|    y|false|-0.45181233874578974| 0.20413438944294027|
|  1.3451017084510097|    z|false| -1.3451017084510097|  1.8092986060778251|
|  0.5323378882945463|    y|false| -0.5323378882945463|  0.2833836273138969|
|  1.3501878997225267|    z|false| -1.3501878997225267|  1.8230073645571279|
|  0.8612113741693206|    x|false| -0.8612113741693206|  0.7416850309986095|
| -1.0453771305385342|    y| true|  1.0453771305385342|  1.0928133450529796|
| -0.7889890249515489|    x|false|  0.7889890249515489|  0.6225036814939958|

In [42]:
df.sort('n').show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
|  -1.261605945319069|    y|false|   1.261605945319069|   1.591649561264422|
| -1.0453771305385342|    y| true|  1.0453771305385342|  1.0928133450529796|
| -0.7889890249515489|    x|false|  0.7889890249515489|  0.6225036814939958|
|  -0.712390662050588|    z|false|   0.712390662050588|   0.507500455376875|
|-0.24332625188556253|    y| true| 0.24332625188556253| 0.05920766485667622|
|-0.04450307833805...|    z|false|0.044503078338053455|0.001980523981562...|
|-0.02677164998644...|    x| true|0.026771649986440726|7.167212429964917E-4|
| 0.12730328020698067|    z|false|-0.12730328020698067|0.016206125151457036|
| 0.31735092273633597|    x|false|-0.31735092273633597| 0.10071160816160388|
| 0.45181233874578974|    y|false|-0.45181233874578974| 0.20413438944294027|

In [43]:
from pyspark.sql.functions import asc, desc

In [44]:
df.sort(asc('group')).show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
|  0.8612113741693206|    x|false| -0.8612113741693206|  0.7416850309986095|
|   0.753766378659703|    x|false|  -0.753766378659703|  0.5681637535977627|
| -0.7889890249515489|    x|false|  0.7889890249515489|  0.6225036814939958|
|-0.02677164998644...|    x| true|0.026771649986440726|7.167212429964917E-4|
|  0.6062886568962988|    x|false| -0.6062886568962988|   0.367585935481118|
| 0.31735092273633597|    x|false|-0.31735092273633597| 0.10071160816160388|
|-0.24332625188556253|    y| true| 0.24332625188556253| 0.05920766485667622|
| -1.0453771305385342|    y| true|  1.0453771305385342|  1.0928133450529796|
|  0.5323378882945463|    y|false| -0.5323378882945463|  0.2833836273138969|
|  0.9137407048596775|    y|false| -0.9137407048596775|  0.8349220757174602|

In [45]:
df.sort(desc('group')).show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
|  1.3501878997225267|    z|false| -1.3501878997225267|  1.8230073645571279|
|  1.4786857374358966|    z| true| -1.4786857374358966|  2.1865115100963415|
| 0.12730328020698067|    z|false|-0.12730328020698067|0.016206125151457036|
|  1.3451017084510097|    z|false| -1.3451017084510097|  1.8092986060778251|
|-0.04450307833805...|    z|false|0.044503078338053455|0.001980523981562...|
|  -0.712390662050588|    z|false|   0.712390662050588|   0.507500455376875|
|  2.1503829673811126|    y| true| -2.1503829673811126|   4.624146906402799|
|  -1.261605945319069|    y|false|   1.261605945319069|   1.591649561264422|
|-0.24332625188556253|    y| true| 0.24332625188556253| 0.05920766485667622|
|  0.9137407048596775|    y|false| -0.9137407048596775|  0.8349220757174602|

In [46]:
df.sort('group', 'n').show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
| -0.7889890249515489|    x|false|  0.7889890249515489|  0.6225036814939958|
|-0.02677164998644...|    x| true|0.026771649986440726|7.167212429964917E-4|
| 0.31735092273633597|    x|false|-0.31735092273633597| 0.10071160816160388|
|  0.6062886568962988|    x|false| -0.6062886568962988|   0.367585935481118|
|   0.753766378659703|    x|false|  -0.753766378659703|  0.5681637535977627|
|  0.8612113741693206|    x|false| -0.8612113741693206|  0.7416850309986095|
|  -1.261605945319069|    y|false|   1.261605945319069|   1.591649561264422|
| -1.0453771305385342|    y| true|  1.0453771305385342|  1.0928133450529796|
|-0.24332625188556253|    y| true| 0.24332625188556253| 0.05920766485667622|
| 0.45181233874578974|    y|false|-0.45181233874578974| 0.20413438944294027|

In [47]:
df.sort('abool', 'group', 'n').show()

+--------------------+-----+-----+--------------------+--------------------+
|                   n|group|abool|                  n2|                  n3|
+--------------------+-----+-----+--------------------+--------------------+
| -0.7889890249515489|    x|false|  0.7889890249515489|  0.6225036814939958|
| 0.31735092273633597|    x|false|-0.31735092273633597| 0.10071160816160388|
|  0.6062886568962988|    x|false| -0.6062886568962988|   0.367585935481118|
|   0.753766378659703|    x|false|  -0.753766378659703|  0.5681637535977627|
|  0.8612113741693206|    x|false| -0.8612113741693206|  0.7416850309986095|
|  -1.261605945319069|    y|false|   1.261605945319069|   1.591649561264422|
| 0.45181233874578974|    y|false|-0.45181233874578974| 0.20413438944294027|
|  0.5323378882945463|    y|false| -0.5323378882945463|  0.2833836273138969|
|  0.9137407048596775|    y|false| -0.9137407048596775|  0.8349220757174602|
|  -0.712390662050588|    z|false|   0.712390662050588|   0.507500455376875|