In [124]:
import folium
import pandas as pd
import geopandas as gpd

In [125]:
%%time

# Read files
census2016 = pd.read_csv('data/subdiv_2016/98-401-X2016042_English_CSV_data.csv')
boundaries = gpd.read_file('data/boundary/')

# Set coordinate reference type
boundaries = boundaries.to_crs({'init': 'epsg:4326'})



CPU times: user 15.3 s, sys: 820 ms, total: 16.1 s
Wall time: 17.4 s


In [126]:
census2016.head()

Unnamed: 0,CENSUS_YEAR,GEO_CODE (POR),GEO_LEVEL,GEO_NAME,GNR,GNR_LF,DATA_QUALITY_FLAG,CSD_TYPE_NAME,ALT_GEO_CODE,DIM: Profile of Census Subdivisions (2247),Member ID: Profile of Census Subdivisions (2247),Notes: Profile of Census Subdivisions (2247),Dim: Sex (3): Member ID: [1]: Total - Sex,Dim: Sex (3): Member ID: [2]: Male,Dim: Sex (3): Member ID: [3]: Female
0,2016,1,2,St. John's,3.5,5.3,0,,1001,"Population, 2016",1,1.0,205955.0,...,...
1,2016,1,2,St. John's,3.5,5.3,0,,1001,"Population, 2011",2,2.0,196954.0,...,...
2,2016,1,2,St. John's,3.5,5.3,0,,1001,"Population percentage change, 2011 to 2016",3,,4.6,...,...
3,2016,1,2,St. John's,3.5,5.3,0,,1001,Total private dwellings,4,3.0,92353.0,...,...
4,2016,1,2,St. John's,3.5,5.3,0,,1001,Private dwellings occupied by usual residents,5,4.0,85015.0,...,...


In [127]:
# Fix column names
census2016 = census2016.rename(columns = {
    'Member ID: Profile of Census Subdivisions (2247)': 'propertyid',
    'Dim: Sex (3): Member ID: [1]: Total - Sex':'total_value',
    'Dim: Sex (3): Member ID: [2]: Male':'male_value',
    'Dim: Sex (3): Member ID: [3]: Female':'female_value',
    'GEO_CODE (POR)': 'geoid'})

boundaries = boundaries.rename(columns = {'CSDUID':'geoid'})

# Fix column type. errors='coerce' to set non-numeric value to NaN
boundaries['geoid'] = pd.to_numeric(boundaries['geoid'], errors='coerce')
census2016['total_value'] = pd.to_numeric(census2016['total_value'], errors='coerce')
census2016['male_value'] = pd.to_numeric(census2016['male_value'], errors='coerce')
census2016['female_value'] = pd.to_numeric(census2016['female_value'], errors='coerce')

In [128]:
# Check dataframe info
census2016.info(verbose=False)
census2016.dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2512146 entries, 0 to 2512145
Columns: 15 entries, CENSUS_YEAR to female_value
dtypes: float64(6), int64(6), object(3)
memory usage: 287.5+ MB


CENSUS_YEAR                                       int64
geoid                                             int64
GEO_LEVEL                                         int64
GEO_NAME                                         object
GNR                                             float64
GNR_LF                                          float64
DATA_QUALITY_FLAG                                 int64
CSD_TYPE_NAME                                    object
ALT_GEO_CODE                                      int64
DIM: Profile of Census Subdivisions (2247)       object
propertyid                                        int64
Notes: Profile of Census Subdivisions (2247)    float64
total_value                                     float64
male_value                                      float64
female_value                                    float64
dtype: object

In [129]:
boundaries.info(verbose=False)
boundaries.dtypes

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 5162 entries, 0 to 5161
Columns: 19 entries, geoid to geometry
dtypes: int64(1), object(18)
memory usage: 766.3+ KB


geoid        int64
CSDNAME     object
CSDTYPE     object
PRUID       object
PRNAME      object
CDUID       object
CDNAME      object
CDTYPE      object
CCSUID      object
CCSNAME     object
ERUID       object
ERNAME      object
SACCODE     object
SACTYPE     object
CMAUID      object
CMAPUID     object
CMANAME     object
CMATYPE     object
geometry    object
dtype: object

In [130]:
%%time

# Select property: "1747.   Mathematics, computer and information sciences"
census_math_cs = census2016[census2016['propertyid']==1747]
census_total_major_field_of_study = census2016[census2016['propertyid']==1713]

# Caclulate ratio
df_attributes = pd.merge(census_math_cs, census_total_major_field_of_study, on='geoid', suffixes=['_math_cs','_total'])
df_attributes['ratio'] = df_attributes['total_value_math_cs'] / df_attributes['total_value_total']

# Join property with geo infomation. 
# Note: Call merge from GeoDataframe to make sure result is still a GeoDataframe
mathcs_ratio_with_geo = boundaries.merge(df_attributes, on='geoid')

CPU times: user 40 ms, sys: 0 ns, total: 40 ms
Wall time: 44.5 ms


In [131]:
%%time
m = folium.Map(location=[43.6532, -79.3832])

m.choropleth(geo_data = mathcs_ratio_with_geo.to_json(), data = mathcs_ratio_with_geo,
             columns = ['geoid', 'ratio'], 
             key_on = 'feature.properties.{}'.format('geoid'),
             fill_color='YlGn',
             fill_opacity=0.7,
             line_opacity=0.2,
             tooltip=folium.features.GeoJsonTooltip(fields=['ratio'],aliases=['ratio'],localize=True))



CPU times: user 2.78 s, sys: 110 ms, total: 2.89 s
Wall time: 2.95 s


In [132]:
m.save('base.html')

## Show detail when hover

In [133]:
def add_popup(mapobj, gdf, popup_field_list):
    folium.GeoJson(
        name='Details',
        data = gdf.to_json(),
        style_function=lambda x: {'weight':.5,'fillColor':'#00000000'},
        tooltip=folium.features.GeoJsonTooltip(fields=popup_field_list,aliases=popup_field_list,localize=True)
    ).add_to(mapobj)
    return mapobj

add_popup(m, mathcs_ratio_with_geo, ['ratio'])

m.save('with_popup.html')