In [2]:
from pyspark import SparkContext, SparkConf

# Step 1: Set up SparkContext
conf = SparkConf().setAppName("MinMaxTemperature").setMaster("local[*]")
sc = SparkContext(conf=conf)

# Step 2: Sample temperature data in the form (Location, Temperature)
data = [
    ("New York", 30),
    ("Los Angeles", 25),
    ("New York", 28),
    ("Los Angeles", 22),
    ("New York", 35),
    ("Los Angeles", 20),
    ("Chicago", 15),
    ("Chicago", 10),
    ("Chicago", 12)
]

# Step 3: Create an RDD from the data
rdd = sc.parallelize(data)

# Step 4: Group data by location and calculate both min and max temperatures for each location
grouped_rdd = rdd.groupByKey()

# Find both minimum and maximum temperatures
min_max_temp_rdd = grouped_rdd.mapValues(lambda temps: (min(temps), max(temps)))

# Step 5: Collect the results and display
result = min_max_temp_rdd.collect()
print("Minimum and Maximum Temperatures by Location:")
for location, (min_temp, max_temp) in result:
    print(f"{location}: Min = {min_temp}°C, Max = {max_temp}°C")

# Stop the SparkContext
sc.stop()

Minimum and Maximum Temperatures by Location:
New York: Min = 28°C, Max = 35°C
Los Angeles: Min = 20°C, Max = 25°C
Chicago: Min = 10°C, Max = 15°C
