In [None]:
pip install pyspark

Collecting pyspark
  Downloading pyspark-3.5.2.tar.gz (317.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.3/317.3 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.5.2-py2.py3-none-any.whl size=317812365 sha256=0efeec1ebbc01e7b6c2bdd154cf67a0fa6f958b65a61f037037e834bb9cad4d4
  Stored in directory: /root/.cache/pip/wheels/34/34/bd/03944534c44b677cd5859f248090daa9fb27b3c8f8e5f49574
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.5.2


In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg,sum,col,pow

spark=SparkSession.builder.appName("weather_data").getOrCreate()

weather_df=spark.read.csv("/content/weather_data.csv", header=True, inferSchema=True)

In [18]:
# 1. Find the Average Temperature for Each City
avg_temperature=weather_df.groupBy("city").agg(avg("temperature_c").alias("avg_temperature"))
print("Average temperature for each city: ")
avg_temperature.show()

# 2.Filter Days with Temperature Below Freezing
freezing_days=weather_df.filter(col("temperature_c")<0)
print("Days with temperature below freezing: ")
freezing_days.show()

# 3.Find the City with the Highest Wind Speed on a Specific Day
highest_wind_speed=weather_df.orderBy(col("wind_speed_kph").desc()).limit(1)
print("City with the highest wind speed on a specific day: ")
highest_wind_speed.show()

# 4. Calculate the Total Number of Days with Rainy Weather
rainy_days_count = weather_df.filter(weather_df["condition"] == "Rain").count()
print(f"Total number of rainy days: {rainy_days_count}")

# 5. Calculate the Average Humidity for Each Weather Condition
avg_humidity=weather_df.groupBy("condition").agg(avg("humidity").alias("avg_humidity"))
print("Average humidity for each weather condition: ")
avg_humidity.show()

# 6. Find the Hottest Day in Each City
hottest_day=weather_df.orderBy(col("temperature_c").desc()).limit(1)
print("Hottest day in each city: ")
hottest_day.show()

# 7. Identify Cities That Experienced Snow
snow_cities=weather_df.filter(weather_df["condition"]=="Snow").select("city").distinct()
print("Cities that experienced snow: ")
snow_cities.show()

# 8. Calculate the Average Wind Speed for Days When the Condition was Sunny
avg_wind_speed=weather_df.filter(weather_df["condition"]=="Sunny").agg(avg("wind_speed_kph").alias("avg_wind_speed"))
print("Average wind speed for days when the condition was sunny: ")
avg_wind_speed.show()

# 9. Find the Coldest Day Across All Cities
coldest_day=weather_df.orderBy(col("temperature_c").asc()).limit(1)
print("Coldest day across all cities: ")
coldest_day.show()

# 10. Create a New Column for Wind Chill
data_with_wind_chill = weather_df.withColumn("wind_chill",
    13.12 + 0.6215 * col("temperature_c") - 11.37 * pow(col("wind_speed_kph"), 0.16) +
    0.3965 * col("temperature_c") * pow(col("wind_speed_kph"), 0.16))
print("New dataset after adding column")
data_with_wind_chill.show()

Average temperature for each city: 
+-----------+-------------------+
|       city|    avg_temperature|
+-----------+-------------------+
|Los Angeles| 17.666666666666668|
|    Chicago|-2.6666666666666665|
|   New York|  4.666666666666667|
+-----------+-------------------+

Days with temperature below freezing: 
+----------+-------+-------------+--------+--------------+---------+
|      date|   city|temperature_c|humidity|wind_speed_kph|condition|
+----------+-------+-------------+--------+--------------+---------+
|2023-01-01|Chicago|           -2|      75|            25|     Snow|
|2023-01-02|Chicago|           -5|      80|            30|     Snow|
|2023-01-03|Chicago|           -1|      70|            18|   Cloudy|
+----------+-------+-------------+--------+--------------+---------+

City with the highest wind speed on a specific day: 
+----------+-------+-------------+--------+--------------+---------+
|      date|   city|temperature_c|humidity|wind_speed_kph|condition|
+----------