### Create Spark Session

In [1]:
from pyspark.sql import SparkSession


spark = SparkSession.builder \
    .appName("My App") \
    .getOrCreate()


### Load CSV data into dataframe

In [2]:
df = spark.read \
    .format("csv") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load("../data/homes.csv")

df.show()


+-----+-----+------+-----+----+-----+----+-----+------+
| Sell| List|Living|Rooms|Beds|Baths| Age|Acres| Taxes|
+-----+-----+------+-----+----+-----+----+-----+------+
|142.0|160.0|  28.0| 10.0| 5.0|  3.0|60.0| 0.28|3167.0|
|175.0|180.0|  18.0|  8.0| 4.0|  1.0|12.0| 0.43|4033.0|
|129.0|132.0|  13.0|  6.0| 3.0|  1.0|41.0| 0.33|1471.0|
|138.0|140.0|  17.0|  7.0| 3.0|  1.0|22.0| 0.46|3204.0|
|232.0|240.0|  25.0|  8.0| 4.0|  3.0| 5.0| 2.05|3613.0|
|135.0|140.0|  18.0|  7.0| 4.0|  3.0| 9.0| 0.57|3028.0|
|150.0|160.0|  20.0|  8.0| 4.0|  3.0|18.0|  4.0|3131.0|
|207.0|225.0|  22.0|  8.0| 4.0|  2.0|16.0| 2.22|5158.0|
|271.0|285.0|  30.0| 10.0| 5.0|  2.0|30.0| 0.53|5702.0|
| 89.0| 90.0|  10.0|  5.0| 3.0|  1.0|43.0|  0.3|2054.0|
|153.0|157.0|  22.0|  8.0| 3.0|  3.0|18.0| 0.38|4127.0|
| 87.0| 90.0|  16.0|  7.0| 3.0|  1.0|50.0| 0.65|1445.0|
|234.0|238.0|  25.0|  8.0| 4.0|  2.0| 2.0| 1.61|2087.0|
|106.0|116.0|  20.0|  8.0| 4.0|  1.0|13.0| 0.22|2818.0|
|175.0|180.0|  22.0|  8.0| 4.0|  2.0|15.0| 2.06|

### Check DF schema

In [3]:
df.schema


StructType([StructField('Sell', DoubleType(), True), StructField('List', DoubleType(), True), StructField('Living', DoubleType(), True), StructField('Rooms', DoubleType(), True), StructField('Beds', DoubleType(), True), StructField('Baths', DoubleType(), True), StructField('Age', DoubleType(), True), StructField('Acres', DoubleType(), True), StructField('Taxes', DoubleType(), True)])

### Transform dataframe

In [4]:
from pyspark.sql.functions import col


df.where(col("Beds") >= 6).show()

df.where('Beds >= 6').show()

df.where(df.Beds >= 6).show()


+-----+-----+------+-----+----+-----+----+-----+------+
| Sell| List|Living|Rooms|Beds|Baths| Age|Acres| Taxes|
+-----+-----+------+-----+----+-----+----+-----+------+
|265.0|270.0|  36.0| 10.0| 6.0|  3.0|33.0|  1.2|5853.0|
+-----+-----+------+-----+----+-----+----+-----+------+

+-----+-----+------+-----+----+-----+----+-----+------+
| Sell| List|Living|Rooms|Beds|Baths| Age|Acres| Taxes|
+-----+-----+------+-----+----+-----+----+-----+------+
|265.0|270.0|  36.0| 10.0| 6.0|  3.0|33.0|  1.2|5853.0|
+-----+-----+------+-----+----+-----+----+-----+------+

+-----+-----+------+-----+----+-----+----+-----+------+
| Sell| List|Living|Rooms|Beds|Baths| Age|Acres| Taxes|
+-----+-----+------+-----+----+-----+----+-----+------+
|265.0|270.0|  36.0| 10.0| 6.0|  3.0|33.0|  1.2|5853.0|
+-----+-----+------+-----+----+-----+----+-----+------+



### Stop spark session

In [7]:
spark.stop()