# Exploratory Data Analysis

In this notebook we are going to visualize the annual average income of male and females based in Sydney by sburbs/postcode. 

In [1]:
# importing all necessary libraries
import folium
import geopandas as gpd
import pandas as pd
from iteration_utilities import duplicates

In [41]:
# Importing all data pandas and geo pandas data dependencies
syd_income = pd.read_csv("Sydney_income_by_postcode.csv")
syd_geo = gpd.read_file("suburb-10-nsw.geojson")

In [178]:
# Checking the geo data 
syd_geo.head()


Unnamed: 0,id,lc_ply_pid,dt_create,loc_pid,nsw_locali,nsw_loca_2,nsw_loca_5,nsw_loca_7,geometry
0,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.1,25113,2015-09-03,NSW1147,2015-11-10,CROYDON PARK,G,1,"POLYGON ((151.11700 -33.89152, 151.11635 -33.8..."
1,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.2,25065,2015-09-03,NSW2557,2015-11-10,MAYFIELD WEST,G,1,"POLYGON ((151.73345 -32.87974, 151.73255 -32.8..."
2,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.3,25115,2015-09-03,NSW797,2015-11-10,CAMPSIE,G,1,"POLYGON ((151.11002 -33.90297, 151.11062 -33.9..."
3,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.4,16193,2011-11-22,NSW4136,2012-02-04,WALLACETOWN,G,1,"POLYGON ((147.48238 -34.96891, 147.50777 -34.9..."
4,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.5,24884,2015-09-03,NSW812,2015-11-10,CANTERBURY,G,1,"POLYGON ((151.12351 -33.90672, 151.12596 -33.9..."
...,...,...,...,...,...,...,...,...,...
4567,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.4564,21327,2013-08-26,NSW4031,2013-11-01,TYALGUM CREEK,G,1,"POLYGON ((153.20887 -28.33521, 153.20690 -28.3..."
4568,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.4565,21330,2013-08-26,NSW1580,2013-11-01,FREDERICKTON,G,1,"POLYGON ((152.88288 -31.01297, 152.88033 -31.0..."
4569,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.4566,21334,2013-08-26,NSW2703,2013-11-01,MONKERAI,G,1,"POLYGON ((151.89607 -32.23903, 151.89392 -32.2..."
4570,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.4567,21339,2013-08-26,NSW3612,2013-11-01,SOUTH DURRAS,G,1,"POLYGON ((150.26989 -35.62462, 150.27006 -35.6..."


In [46]:
syd_geo=syd_geo[['nsw_loca_2','geometry']]

syd_geo.shape

(4572, 2)

In [47]:
#sorting and removing duplicates from geo data frame
syd_geo.sort_values("nsw_loca_2", inplace = True)
syd_geo1= syd_geo.drop_duplicates(subset ="nsw_loca_2", 
                     keep = False, inplace = False)

In [49]:
# Merge and create a geopandas dataframe
sydgeo_gdf=syd_geo1.merge(syd_income,on="nsw_loca_2")


In [180]:
# Checking if all the columns are populated as required
sydgeo_gdf.head(2)

Unnamed: 0,nsw_loca_2,geometry,State,Postcode,SuburbName,SA4_NAME,Number of Female individuals,Yearly average taxable income of Female,Number of Male individuals,Yearly average taxable income of Male,Over all average taxable income of suburb
0,ABBOTSBURY,"POLYGON ((150.86993 -33.87242, 150.86943 -33.8...",NSW,2176,Abbotsbury,Sydney - South West,10945,"$41,893",12928,"$53,731",47812
1,ACACIA GARDENS,"POLYGON ((150.92140 -33.73122, 150.91815 -33.7...",NSW,2763,Acacia Gardens,Sydney - Blacktown,8224,"$45,084",9032,"$61,168",53126


# Creating Choropleth Map

In [163]:
# Assigning centroids to the map
x_map=sydgeo_gdf.centroid.x.mean()
y_map=sydgeo_gdf.centroid.y.mean()
print(x_map,y_map)

151.05959339241434 -33.81584647807083


In [164]:
# Assingning object to the map
syd_inc_map = folium.Map(location=[y_map, x_map], zoom_start=6,tiles=None)
folium.TileLayer('stamenwatercolor',name="Light Map",control=False).add_to(syd_inc_map)
syd_inc_map    

In [165]:
# Creating choropleth map

syd_inc_map.choropleth(geo_data=sydgeo_gdf,
             name='Choropleth',         
             data=sydgeo_gdf,
             columns=['nsw_loca_2','Over all average taxable income of suburb'], 
             key_on='feature.properties.nsw_loca_2', 
             fill_color='OrRd',
             fill_opacity=1, 
             line_opacity=0.8,
          #   threshold_scale=scale,     
             legend_name='Over all average taxable income of suburb', 
             smooth_factor=0,     
             highlight=True
                 )



In [166]:
# Map preview
syd_inc_map

In [168]:
# Adding informative lables to the map using geoJson

style_function = lambda x: {'fillColor': '#ffffff', 
                            'color':'#000000', 
                            'fillOpacity': 0.1, 
                            'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000', 
                                'color':'#000000', 
                                'fillOpacity': 0.50, 
                                'weight': 0.1}




SuburbName = folium.features.GeoJson(
    sydgeo_gdf,
    style_function=style_function, 
    control=False,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['Postcode'
                ,'SuburbName'
                ,'SA4_NAME'
                ,'  Yearly average taxable income of Female   '
                ,'  Yearly average taxable income of Male   '
               ],
        aliases=['Postcode: '
                ,'SuburbName: '
                ,'SA4_NAME: '
                ,'  Yearly average taxable income of Female   : '
                ,'  Yearly average taxable income of Male   : ' 
                 ],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    )
)
syd_inc_map.add_child(SuburbName)
syd_inc_map.keep_in_front(SuburbName)

# Adding folium layer to the map
folium.LayerControl().add_to(syd_inc_map)


<folium.map.LayerControl at 0x21699c3d278>

In [169]:
# Map preview
syd_inc_map

In [152]:
# Saving the map
syd_inc_map.save("syd_inc_map.html")