# Pyspark Read Parquet file into DataFrame

In [0]:
df_par = spark.read.parquet("dbfs:/FileStore/parquet/House_price.parquet")
df_par.printSchema()
df_par.show(5)

root
 |-- price: long (nullable = true)
 |-- area: long (nullable = true)
 |-- bedrooms: long (nullable = true)
 |-- bathrooms: long (nullable = true)
 |-- stories: long (nullable = true)
 |-- mainroad: string (nullable = true)
 |-- guestroom: string (nullable = true)
 |-- basement: string (nullable = true)
 |-- hotwaterheating: string (nullable = true)
 |-- airconditioning: string (nullable = true)
 |-- parking: long (nullable = true)
 |-- prefarea: string (nullable = true)
 |-- furnishingstatus: string (nullable = true)

+--------+----+--------+---------+-------+--------+---------+--------+---------------+---------------+-------+--------+----------------+
|   price|area|bedrooms|bathrooms|stories|mainroad|guestroom|basement|hotwaterheating|airconditioning|parking|prefarea|furnishingstatus|
+--------+----+--------+---------+-------+--------+---------+--------+---------------+---------------+-------+--------+----------------+
|13300000|7420|       4|        2|      3|     yes|       no

# Pyspark Write DataFrame to Parquet file format

In [0]:
df_par.write.parquet("dbfs:/FileStore/parquet/writeData",mode="overwrite")
#df_par.write.mode("overwrite").parquet("dbfs:/FileStore/parquet/writeData")

In [0]:
df_par.select("furnishingstatus").distinct().show()

+----------------+
|furnishingstatus|
+----------------+
|  semi-furnished|
|     unfurnished|
|       furnished|
+----------------+



In [0]:
#df_par.write.parquet("dbfs:/FileStore/parquet/writeData",mode="append",partitionBy="furnishingstatus") # OR
df_par.write.partitionBy("furnishingstatus").mode("overwrite").parquet("dbfs:/FileStore/parquet/writeData-1")

In [0]:
df_par.write.parquet("dbfs:/FileStore/parquet/writeData2",mode="append",partitionBy=["furnishingstatus","prefarea"])

##Retrieving from a partitioned Parquet file

In [0]:
parDF2=spark.read.parquet("dbfs:/FileStore/parquet/writeData-1/furnishingstatus=furnished")
parDF2.show(truncate=False)

+--------+-----+--------+---------+-------+--------+---------+--------+---------------+---------------+-------+--------+
|price   |area |bedrooms|bathrooms|stories|mainroad|guestroom|basement|hotwaterheating|airconditioning|parking|prefarea|
+--------+-----+--------+---------+-------+--------+---------+--------+---------------+---------------+-------+--------+
|13300000|7420 |4       |2        |3      |yes     |no       |no      |no             |yes            |2      |yes     |
|12250000|8960 |4       |4        |4      |yes     |no       |no      |no             |yes            |3      |no      |
|12215000|7500 |4       |2        |2      |yes     |no       |yes     |no             |yes            |3      |yes     |
|11410000|7420 |4       |1        |2      |yes     |yes      |yes     |no             |yes            |2      |no      |
|9870000 |8100 |4       |1        |2      |yes     |yes      |yes     |no             |yes            |2      |yes     |
|9800000 |13200|3       |1      

##Creating a table on Partitioned Parquet file

In [0]:
spark.sql("CREATE TEMPORARY VIEW home USING parquet OPTIONS (path \"dbfs:/FileStore/parquet/writeData-1/furnishingstatus=semi-furnished\")")
spark.sql("SELECT * FROM home" ).show()

+--------+-----+--------+---------+-------+--------+---------+--------+---------------+---------------+-------+--------+
|   price| area|bedrooms|bathrooms|stories|mainroad|guestroom|basement|hotwaterheating|airconditioning|parking|prefarea|
+--------+-----+--------+---------+-------+--------+---------+--------+---------------+---------------+-------+--------+
|12250000| 9960|       3|        2|      2|     yes|       no|     yes|             no|             no|      2|     yes|
|10850000| 7500|       3|        3|      1|     yes|       no|     yes|             no|            yes|      2|     yes|
|10150000| 8580|       4|        3|      4|     yes|       no|      no|             no|            yes|      2|     yes|
| 9681000| 6000|       4|        3|      2|     yes|      yes|     yes|            yes|             no|      2|      no|
| 9310000| 6550|       4|        2|      2|     yes|       no|      no|             no|            yes|      1|     yes|
| 9240000| 7800|       3|       