In [1]:
# required libraries
# install packages using ----pip install---- command, if required
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm_pandas as tqdmp
from tqdm.notebook import tqdm, tqdm_notebook, trange
from time import sleep
from meteostat import Stations, Daily

In [2]:
# read australia post codes information dataset
australia_long_lat_data = pd.read_csv("Australian_Post_Codes_Lat_Lon.csv")

In [3]:
# filter out suburb if it's empty
australia_long_lat_data = australia_long_lat_data[australia_long_lat_data['suburb'].notna()]

# retrieve Victoria data
victoria_long_lat_data = australia_long_lat_data[australia_long_lat_data.state == 'VIC'].reset_index()

# select first suburb of each postcode
victoria_long_lat_data = victoria_long_lat_data.groupby('postcode').first().reset_index()

# drop unwanted columns
victoria_long_lat_data.drop(['index', 'state', 'dc', 'type'], axis = 1, inplace = True)

In [4]:
# empty dataframe for storing weather information
weather_df = pd.DataFrame(columns=['postcode' ,'suburb', 'lat', 'long', 'tavg','tmin','tmax','prcp','snow','wdir','wspd', 'wpgt', 'pres', 'tsun'])

In [5]:
# looping over each row
for index, row in tqdm_notebook(victoria_long_lat_data.iterrows()):
    
    # getting nearest station information
    stations = Stations(lat = row['lat'], lon = row['lon'])
    station = stations.fetch(1)
    
    # retrieve data for last 6 years
    data = Daily(station, start = datetime(2015, 1, 1), end = datetime(2021, 7, 31))
    data = data.fetch()
    
    data['postcode'] = row['postcode']
    data['suburb'] = row['suburb']
    data['long'] = row['lon']
    data['lat'] = row['lat']
    
    # updating empty dataframe
    weather_df = weather_df.append(data)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [6]:
# reset index
weather_df.reset_index(level=0, inplace=True)

# rename index to date column
weather_df.rename(columns=({ 'index': 'Date'}), inplace=True)

In [7]:
# split date into year, month and day
weather_df['year'] = pd.DatetimeIndex(weather_df['Date']).year
weather_df['month'] = pd.DatetimeIndex(weather_df['Date']).month
weather_df['day'] = pd.DatetimeIndex(weather_df['Date']).day

In [8]:
# drop unwanted columns
weather_df.drop(['snow', 'wdir', 'wspd', 'wpgt', 'pres', 'tsun', 'prcp'], axis = 1, inplace = True)

# clean up data where long and lat is 0 (if available)
weather_df = weather_df[(weather_df.long != 0) & (weather_df.lat != 0)]

In [9]:
# group data to get average minimum and maximum temp for each month of each suburb
avg_min_temp_df = weather_df.groupby(['postcode', 'year', 'month', 'suburb', 'long', 'lat'])['tmin'].mean().reset_index()
avg_max_temp_df = weather_df.groupby(['postcode', 'year', 'month', 'suburb', 'long', 'lat'])['tmax'].mean().reset_index()

In [10]:
# merging both dataframes
combined_df = pd.concat([avg_min_temp_df, avg_max_temp_df[['tmax']]], axis=1)

In [11]:
# writing data to csv file
combined_df.to_csv("suburb_generated_weather_data.csv", index=False)