In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [None]:
# location dataframe
# Source https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/1270.0.55.003June%202020?OpenDocument
# Data taken for "Local Government Areas ASGS Ed 2020 Digital Boundaries in ESRI Shapefile Format"

lga_gdf = gpd.read_file('data/LGA_2020_AUST.shp') #load the data using Geopandas
lga_gdf = lga_gdf[lga_gdf['STE_NAME16']=='New South Wales'] #Select the data for the state of New South Wales
lga_gdf['LGA_CODE20'] = lga_gdf['LGA_CODE20'].astype('str') # we will join on this axis, so both dataframes need this to be the same type

#show the data
lga_gdf.head()

In [None]:
# population data
# Source https://www.abs.gov.au/statistics/people/population/regional-population-age-and-sex/latest-release#interactive-maps 

pop_df = pd.read_csv('data/population.csv')
pop_df['LGA_CODE20'] = pop_df['LGA_CODE20'].astype('str') # we will join on this axis, so both dataframes need this to be the same type

# show the data
pop_df.head()

In [None]:
# merge one-to-one for population and location
df_merged1 = pd.merge(lga_gdf[['LGA_CODE20', 'geometry', 'LGA_NAME20']], pop_df[['LGA_CODE20', 'population']], left_on='LGA_CODE20', right_on='LGA_CODE20', how='left')
df_merged1 = df_merged1.dropna(subset=['population', 'geometry',]) # Drop if has None in some of the values after the merge (ie there where LGAs that werent in both data sources)
df_merged1 = df_merged1.set_index('LGA_CODE20') # change the index from arbritrary numbers to be the LGA_CODE

# show the data
df_merged1.head()

In [None]:
# covid 19 data
# Source https://data.nsw.gov.au/data/dataset/nsw-covid-19-cases-by-location-and-likely-source-of-infection/resource/2776dbb8-f807-4fb2-b1ed-184a6fc2c8aa 

# superseded code to import csv from saved file in working directory
# cov_df = pd.read_csv('data/confirmed_cases_table4_location_likely_source.csv') # read the csv information saved in folder

# code to load information directly from government website
url = 'https://data.nsw.gov.au/data/dataset/97ea2424-abaf-4f3e-a9f2-b5c883f42b6a/resource/2776dbb8-f807-4fb2-b1ed-184a6fc2c8aa/download/confirmed_cases_table4_location_likely_source.csv'
cov_df = pd.read_csv(url) # read the csv information directly from the nsw hosted location (should allow for updating easily)

# clean data
cov_df = cov_df[['notification_date', 'lga_code19']] # select only the columns we need
cov_df = cov_df.dropna(subset=['lga_code19','notification_date']) # remove any rows that have incomplete data
cov_df['lga_code19'] = cov_df['lga_code19'].astype('str') # we will join on this axis, so both dataframes need this to be the same type (int then str to prevent .0)
cov_df['notification_date'] = pd.to_datetime(cov_df['notification_date'], format="%Y/%m/%d") #change date from string to pandas datetime format

# show the data
cov_df.head()

In [None]:
# count cases within the last two weeks only
end_date = pd.to_datetime('05/09/2021', format='%d/%m/%Y')
time_dif = pd.to_timedelta('14 days')
start_date = end_date - time_dif
dated_df = cov_df[(cov_df['notification_date'] > start_date) & (cov_df['notification_date'] <= end_date)]
dated_df = dated_df.groupby(['lga_code19']).size().reset_index(name="cases") # find the total two week cases for each LGA (total number rows for each LGA)

# show the data
dated_df.head()

In [None]:
# merge one-to-one for the covid cases with the LGA population and geometry data
df_merged = pd.merge(dated_df, df_merged1, left_on='lga_code19', right_on='LGA_CODE20', validate='one_to_one')

# cases and population should both have none 0 values, so the following math shouldnt have any error
df_merged['percentage_cases'] = (df_merged['cases'].astype('int')/df_merged['population'].astype('float')).astype('float')

# edit geo data to allow use with  mapbox graph
df_merged = df_merged.set_index('lga_code19') 
df_merged = gpd.GeoDataFrame(df_merged)
df_merged = df_merged.to_crs(epsg=4326) # convert the coordinate reference system to lat/long
lga_json = df_merged.__geo_interface__ #covert to geoJSON

df_merged.head()

In [None]:
# In order to use this code you will need to use your own private mapbox access token
# You can sign up and get a token for mapbox here https://account.mapbox.com/access-tokens/
# For personal use, accessing the API will be free.

MAPBOX_ACCESSTOKEN = 'your-token-here'

# max and min for color scale
zmin = df_merged['percentage_cases'].min()
zmax = df_merged['percentage_cases'].max()

# Just use zmax of 0.02 so that the rural cases dont skew the
# data so much that it is hard to compare sydney
zmax =0.020

# convert start and end date into a readable string for figure title
t1 = start_date.strftime('%d/%m/%Y')
t2 = end_date.strftime('%d/%m/%Y')



data = go.Choroplethmapbox(
        geojson = lga_json,             #this is your GeoJSON
        locations = df_merged.index,    #the index of this dataframe should align with the 'id' element in your geojson
        z = df_merged.percentage_cases, #sets the color value
        meta = list(zip(df_merged.LGA_NAME20, df_merged.cases, df_merged.population.astype('str'))), #sets text for each shape
        colorbar=dict(thickness=20, ticklen=3, tickformat='.2%',outlinewidth=0), #adjusts the format of the colorbar
        marker_line_width=1, marker_opacity=0.7, colorscale="Viridis", #adjust format of the plot
        zmin=zmin, zmax=zmax,           #sets min and max of the colorbar
        hovertemplate = "<b>%{meta[0]}</b><br>" +
                    "Cases: %{meta[1]}<br>" +
                    "Population: %{meta[2]}<br>" +
                    "Percentage: %{z:.3%}<br>" +
                    "<extra></extra>")  # sets the format of the text shown when you hover over each shape


# Set the layout for the map
layout = go.Layout(
    title = {'text': f"Covid cases normalized by LGA Population - {t1} to {t2}",
            'font': {'size':24}},       #format the plot title 
    mapbox1 = dict(
        domain = {'x': [0, 1],'y': [0, 1]}, 
        center = dict(lat=-33.5 , lon=151),
        accesstoken = MAPBOX_ACCESSTOKEN, 
        zoom = 6),                  
    autosize=True,
    height=650,
    margin=dict(l=0, r=0, t=40, b=0))

# Generate the map
fig=go.Figure(data=data,layout=layout)
fig.show()

# fig.write_html('index.html') # code to save the figure as a html file