## Session Initialization

In [1]:
import pyspark
from pyspark import SparkContext
from pathlib import Path

sc = SparkContext()
spark = pyspark.sql.SparkSession(sc, jsparkSession=None)

## Read Json file

In [2]:
path = Path.cwd().parent / "Files" / "bookcontents.json"
file = str(path)

bookDF = spark.read.json(file)

## Schema

In [3]:
bookDF.printSchema()

root
 |-- Chapter: long (nullable = true)
 |-- Name: string (nullable = true)
 |-- Page: long (nullable = true)



## Action

In [4]:
bookDF.show()

+-------+--------------------+----+
|Chapter|                Name|Page|
+-------+--------------------+----+
|      1|        Introduction|  11|
|      2|Basic Engineering...|  19|
|      3|Advanced Engineer...|  28|
|      4|     Hands On Course|  60|
|      5|        Case Studies|  62|
|      6|Best Practices Cl...|  73|
|      7|130+ Data Sources...|  77|
|      8|1001 Interview Qu...|  82|
|      9|Recommended Books...|  87|
+-------+--------------------+----+



## Transformations

In [5]:
df1 = bookDF.select("Page", "Chapter", "Name").where("Chapter % 2 = 0").orderBy("Page", ascending=False)
df1.show()

+----+-------+--------------------+
|Page|Chapter|                Name|
+----+-------+--------------------+
|  82|      8|1001 Interview Qu...|
|  73|      6|Best Practices Cl...|
|  60|      4|     Hands On Course|
|  19|      2|Basic Engineering...|
+----+-------+--------------------+



## Chaining -> Query

In [6]:
bookDF.select("Chapter","Name","Page").where("Page % 2 = 0").orderBy("Chapter").show()

+-------+--------------------+----+
|Chapter|                Name|Page|
+-------+--------------------+----+
|      3|Advanced Engineer...|  28|
|      4|     Hands On Course|  60|
|      5|        Case Studies|  62|
|      8|1001 Interview Qu...|  82|
+-------+--------------------+----+



## Save File

In [None]:
df1.write.json("df1")