# Weather Data Analysis

In [92]:
import pandas as pd

# Loading and cleaning the data
weather = pd.read_csv("weatherAUS.csv")
weather["Date"] = pd.to_datetime(weather["Date"])
weather = weather.dropna(subset=["MinTemp", "MaxTemp", "Rainfall", "Location"])

weather.head(10)


Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,0.0,No
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,25.0,1010.6,1007.8,,,17.2,24.3,No,0.0,No
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,0.0,No
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,16.0,1017.6,1012.8,,,18.1,26.5,No,1.0,No
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,0.2,No
5,2008-12-06,Albury,14.6,29.7,0.2,,,WNW,56.0,W,...,23.0,1009.2,1005.4,,,20.6,28.9,No,0.0,No
6,2008-12-07,Albury,14.3,25.0,0.0,,,W,50.0,SW,...,19.0,1009.6,1008.2,1.0,,18.1,24.6,No,0.0,No
7,2008-12-08,Albury,7.7,26.7,0.0,,,W,35.0,SSE,...,19.0,1013.4,1010.1,,,16.3,25.5,No,0.0,No
8,2008-12-09,Albury,9.7,31.9,0.0,,,NNW,80.0,SE,...,9.0,1008.9,1003.6,,,18.3,30.2,No,1.4,Yes
9,2008-12-10,Albury,13.1,30.1,1.4,,,W,28.0,S,...,27.0,1007.0,1005.7,,,20.1,28.2,Yes,0.0,No


In [93]:
# Caculate the average min temp and average max temp
avg_min = round((weather["MinTemp"].mean()), 2)
avg_max = round((weather["MaxTemp"].mean()), 2)

print(f"The average minimum temperature is {avg_min}C")
print(f"The average maximum temperature is {avg_max}C")

The average minimum temperature is 12.09C
The average maximum temperature is 23.27C


In [94]:
# Indentify the hottest and coldest month
weather["Month"] = weather["Date"].dt.month_name()

weather["Month"] = pd.Categorical(
    weather["Month"],
    categories=["January","February","March","April","May","June",
                "July","August","September","October","November","December"],
    ordered=True
)


monthly_temp = weather.groupby("Month", observed=True)[["MinTemp", "MaxTemp"]].mean()
hottest_month = monthly_temp["MaxTemp"].idxmax()
coldest_month= monthly_temp["MinTemp"].idxmin()

print(f"The hottest month is {hottest_month}")
print(f"The coldest month is {coldest_month}")

The hottest month is January
The coldest month is July


In [95]:
# Calculate the average monthly rainfall(mm)
average_monthly_rainfall = weather.groupby("Month", observed=True)["Rainfall"].mean()
average_monthly_rainfall

Month
January      2.728374
February     3.345375
March        3.030144
April        2.300315
May          2.030571
June         2.363431
July         2.182509
August       2.038607
September    1.638843
October      1.889054
November     2.314987
December     2.306888
Name: Rainfall, dtype: float64

In [96]:
# Find out the wettest month
monthly_rainfall = weather.groupby("Month", observed=True)["Rainfall"].sum()
highest_rainfall = monthly_rainfall.idxmax()

print(f"The month with the highest rainfall is {highest_rainfall}")

The month with the highest rainfall is March


In [97]:
# Indentify the hottest city/region
regions = weather.groupby("Location")["MaxTemp"].mean()
hottest_region = regions.idxmax()
temp = round((regions.loc[hottest_region]), 2)
print(f"{hottest_region} is the hottest city/region with the average max temp of {temp}C")

Katherine is the hottest city/region with the average max temp of 34.89C


In [98]:
# Find out the wettest region 
location = weather.groupby("Location")["Rainfall"].sum()
wettest_region = location.idxmax()
print(f"{wettest_region} receives the most rainfall in Australia")

Cairns receives the most rainfall in Australia
