In [59]:
import sys
from pyspark.sql import SparkSession
import pyspark.sql.functions as sparkFun

#Set a name for the application
appName = "DataFrame Example"

#Set the input folder location
input_folder = "data/in" 

#Set the output folder location
output_folder = "data/out" 

#create a new Spark application and get the Spark session object
spark = SparkSession.builder.appName(appName).getOrCreate()

#read in the CSV dataset as a DataFrame
#inferSchema option forces Spark to automatically specify data column types
#header option forces Spark to automatically fetch column names from the first line in the dataset files
weather_df = spark.read \
              .option("inferSchema", True) \
              .option("header", True) \
              .csv(input_folder)


#Show 10 rows without truncating lines.
#review content might be a multi-line string.
#weather_df.show(10, False)

#Show dataset schema/structure with filed names and types
#weather_df.printSchema()

#-----------------------------------

result_df = weather_df.select("Station Name", "Measurement Timestamp", "Solar Radiation") \
                            .withColumn("day", sparkFun.split(weather_df["Measurement Timestamp"], " ")[0]) \
                            .groupBy("Station Name", "day") \
                            .agg(sparkFun.min("Solar Radiation").alias("Min Solar radiation"), \
                                 sparkFun.avg("Solar Radiation").alias("Average Solar radiation"), \
                                 sparkFun.max("Solar Radiation").alias("Max Solar radiation")) \
                            .orderBy("Average Solar Radiation", ascending=False)

result_df.show(10, False) 

result_df.coalesce(1).write.mode("overwrite").format("csv").save(output_folder+"/ex7-4")

#Stop Spark session
spark.stop()

+---------------------------+----------+-------------------+-----------------------+-------------------+
|Station Name               |day       |Min Solar radiation|Average Solar radiation|Max Solar radiation|
+---------------------------+----------+-------------------+-----------------------+-------------------+
|63rd Street Weather Station|05/06/2023|544                |544.0                  |544                |
|Foster Weather Station     |08/11/2020|394                |504.15384615384613     |821                |
|63rd Street Weather Station|05/19/2016|3                  |470.64285714285717     |910                |
|Foster Weather Station     |08/12/2020|410                |463.0                  |819                |
|Oak Street Weather Station |07/07/2018|-3                 |454.25                 |892                |
|Oak Street Weather Station |05/21/2016|0                  |436.6                  |861                |
|Oak Street Weather Station |05/19/2016|0              