In [1]:
# import required module
from pyspark.sql import SparkSession
from pyspark.sql.functions import *


In [2]:
# creating a SparkSession

spark = SparkSession.\
builder.\
appName("sparksql").\
getOrCreate()

In [3]:
# see the latest version

print(spark.version)

3.3.2


In [4]:
# using Spark to read a CSV file named "operations_management.csv" and load it into a DataFrame named "data." 

data = spark.read.format('csv').\
option('inferSchema','true').\
option('header', 'true').\
option('path','operations_management.csv').\
load()

In [5]:
# check  schema for data

data.printSchema()

root
 |-- description: string (nullable = true)
 |-- industry: string (nullable = true)
 |-- level: integer (nullable = true)
 |-- size: string (nullable = true)
 |-- line_code: string (nullable = true)
 |-- value: integer (nullable = true)



In [6]:
# new DataFrame named "data_2" that contains only the "industry" and "value" columns 
# from the original "data" DataFrame, with rows filtered based on the specified conditions, 
# and sorted in descending order by the "value" column.

data_2 = data.select("industry","value").\
filter((col("value") > 200) & (col("industry") != "total")).\
orderBy(desc("value"))

In [7]:
 #check schema

data_2.printSchema()

root
 |-- industry: string (nullable = true)
 |-- value: integer (nullable = true)



In [8]:
#show the latest top 5

data_2.show(5)

+--------------------+-----+
|            industry|value|
+--------------------+-----+
|        Construction| 6030|
|        Construction| 5904|
|        Construction| 5229|
|Accommodation & f...| 5058|
|        Construction| 4965|
+--------------------+-----+
only showing top 5 rows



In [9]:
# creates a temporary view in Spark SQL named "data" using the "data_2" DataFrame. 

data_2.createOrReplaceTempView("data")

In [10]:
# run SparkSQL 

spark.sql("""SELECT industry, value 
FROM data 
WHERE value > 200 
AND industry != "total"
""").show()

+--------------------+-----+
|            industry|value|
+--------------------+-----+
|        Construction| 6030|
|        Construction| 5904|
|        Construction| 5229|
|Accommodation & f...| 5058|
|        Construction| 4965|
|        Construction| 4959|
|Accommodation & f...| 4950|
|        Construction| 4686|
|        Construction| 4668|
|        Construction| 4665|
|       Manufacturing| 4662|
|       Manufacturing| 4632|
|        Construction| 4575|
|        Construction| 4566|
|Professional, sci...| 4476|
|Professional, sci...| 4470|
|        Retail trade| 4434|
|        Retail trade| 4434|
|Accommodation & f...| 4251|
|Accommodation & f...| 4176|
+--------------------+-----+
only showing top 20 rows

