Import Data using URL

In [10]:
# If not already installed, do: pip install pandas fastparquet
# Import population data from DOSM

import pandas as pd

URL_DATA = 'https://storage.dosm.gov.my/population/population_state.parquet'
popul = pd.read_parquet(URL_DATA)
if 'date' in popul.columns: popul['date'] = pd.to_datetime(popul['date'])
popul

Unnamed: 0,date,state,sex,ethnicity,age,population
0,2020-01-01,Johor,overall_sex,overall_ethnicity,overall_age,4009.7
1,2020-01-01,Johor,overall_sex,overall_ethnicity,0-4,306.3
2,2020-01-01,Johor,overall_sex,overall_ethnicity,5-9,322.3
3,2020-01-01,Johor,overall_sex,overall_ethnicity,10-14,330.7
4,2020-01-01,Johor,overall_sex,overall_ethnicity,15-19,345.2
...,...,...,...,...,...,...
25531,2023-01-01,W.P. Putrajaya,female,other_noncitizen,65-69,0.0
25532,2023-01-01,W.P. Putrajaya,female,other_noncitizen,70-74,0.0
25533,2023-01-01,W.P. Putrajaya,female,other_noncitizen,75-79,0.0
25534,2023-01-01,W.P. Putrajaya,female,other_noncitizen,80-84,0.0


In [11]:
overall = popul[(popul['sex']=="overall_sex") & (popul['ethnicity']=="overall_ethnicity") & (popul['age']=="overall_age")] 
#     ^ filter sex,ethnicity and age to only overall values ^
overall = overall.drop(overall.loc[:, 'sex':'age'].columns, axis=1)
#     ^ drop column sex, ethnicity and age leave only population ^
popul_2022 = overall.loc[overall['date'] == '2022-01-01']
#     ^ filter date to only 2022 ^
popul_2022


Unnamed: 0,date,state,population
12768,2022-01-01,Johor,4028.3
12825,2022-01-01,Kedah,2163.1
12882,2022-01-01,Kelantan,1830.6
12939,2022-01-01,Melaka,1008.6
12996,2022-01-01,Pahang,1207.9
13053,2022-01-01,Negeri Sembilan,1614.3
13110,2022-01-01,Perak,2514.4
13167,2022-01-01,Perlis,289.8
13224,2022-01-01,Pulau Pinang,1740.9
13281,2022-01-01,Sabah,3414.9


In [12]:
popul_2022.shape

(16, 3)

In [13]:
# If not already installed, do: pip install pandas fastparquet
# Import population household income data from DOSM

import pandas as pd

URL_DATA = 'https://storage.dosm.gov.my/hies/hies_state.parquet'

hies = pd.read_parquet(URL_DATA)
if 'date' in hies.columns: hies['date'] = pd.to_datetime(hies['date'])
hies

Unnamed: 0,date,state,income_mean,income_median,expenditure_mean,gini,poverty
0,2022-01-01,Johor,8517,6879,5342,0.36646,4.6
1,2022-01-01,Kedah,5550,4402,3765,0.35938,9.0
2,2022-01-01,Kelantan,4885,3614,3505,0.3854,13.2
3,2022-01-01,Melaka,8057,6210,5707,0.36963,4.2
4,2022-01-01,Negeri Sembilan,6788,5226,4678,0.36853,4.4
5,2022-01-01,Pahang,5777,4753,4107,0.3077,6.3
6,2022-01-01,Pulau Pinang,8267,6502,5322,0.37058,2.0
7,2022-01-01,Perak,5779,4494,3903,0.36769,7.5
8,2022-01-01,Perlis,5664,4713,3834,0.33589,4.0
9,2022-01-01,Selangor,12233,9983,6770,0.36123,1.5


JOIN dataset Population + Household Income

In [15]:
import pandas as pd
import numpy as np

pov_rate = pd.merge(popul_2022,hies, left_on =['date','state'], right_on = ['date','state'], how = 'inner')
pov_rate=pov_rate.replace('W.P. Kuala Lumpur','Federal Territory of Kuala Lumpur')
pov_rate=pov_rate.replace('W.P. Labuan','Federal Territory of Labuan')
pov_rate=pov_rate.replace('W.P. Putrajaya','Federal Territory of Putrajaya')
pov_rate=pov_rate.replace('Pulau Pinang','Penang')
pov_rate.sort_values(by=['state'])

Unnamed: 0,date,state,population,income_mean,income_median,expenditure_mean,gini,poverty
13,2022-01-01,Federal Territory of Kuala Lumpur,1961.2,13325,10234,7823,0.3796,1.4
14,2022-01-01,Federal Territory of Labuan,96.9,8250,6904,4176,0.30028,2.5
15,2022-01-01,Federal Territory of Putrajaya,117.0,13473,10056,8897,0.3678,0.1
0,2022-01-01,Johor,4028.3,8517,6879,5342,0.36646,4.6
1,2022-01-01,Kedah,2163.1,5550,4402,3765,0.35938,9.0
2,2022-01-01,Kelantan,1830.6,4885,3614,3505,0.3854,13.2
3,2022-01-01,Melaka,1008.6,8057,6210,5707,0.36963,4.2
5,2022-01-01,Negeri Sembilan,1614.3,6788,5226,4678,0.36853,4.4
4,2022-01-01,Pahang,1207.9,5777,4753,4107,0.3077,6.3
8,2022-01-01,Penang,1740.9,8267,6502,5322,0.37058,2.0


Choropleth Maps using GeoJSON

In [20]:
from plotly import graph_objects as go

for col in pov_rate.columns:
    pov_rate[col] = pov_rate[col].astype(str)
    
pov_rate['text'] = '% Gini: ' + pov_rate['gini'] + '<br>' + \
    'Mean Income: ' + pov_rate['income_mean'] + '<br>' + \
    'Mean Expenditure: ' + pov_rate['expenditure_mean'] + '<br>' + \
    'Population: ' + pov_rate['population'] + 'k'
    
#Create figure object
fig = go.Figure(
    go.Choroplethmapbox(
        geojson = 'states.geojson', #Assign geojson file
        featureidkey = 'properties.Name', #Assign feature key
        locations = pov_rate['state'], #Assign location data
        z = pov_rate['poverty'], #Assign information data
        zauto = True,
        colorscale = 'reds',
        colorbar_title='Poverty rates %',
        colorbar_tickprefix='%',
        showscale = True,
        marker_line_color='white',
        text=pov_rate['text']
    )
)

#Update layout
fig.update_layout(
    title_text='Malaysia Poverty Rates by State in 2022',
    mapbox_style = "carto-positron", #Decide a style for the map
    mapbox_zoom = 4.8, #Zoom in scale
    mapbox_center = {"lat": 4, "lon": 109.688}, #Center location of the map
)
fig.show()

In [23]:
from plotly import graph_objects as go

for col in pov_rate.columns:
    pov_rate[col] = pov_rate[col].astype(str)
    
pov_rate['text'] = '% Gini: ' + pov_rate['gini'] + '<br>' + \
    '% Poverty Rate: ' + pov_rate['poverty'] + '<br>' + \
    'Mean Income: ' + pov_rate['income_mean'] + '<br>' + \
    'Mean Expenditure: ' + pov_rate['expenditure_mean'] 
    
#Create figure object
fig = go.Figure(
    go.Choroplethmapbox(
        geojson = 'states.geojson', #Assign geojson file
        featureidkey = 'properties.Name', #Assign feature key
        locations = pov_rate['state'], #Assign location data
        z = pov_rate['population'], #Assign information data
        zauto = True,
        colorscale = 'purd',
        colorbar_title='Population (k)',
        showscale = True,
        marker_line_color='white',
        text=pov_rate['text']
    )
)

#Update layout
fig.update_layout(
    title_text='Malaysia Population by State in 2022',
    mapbox_style = "carto-positron", #Decide a style for the map
    mapbox_zoom = 4.8, #Zoom in scale
    mapbox_center = {"lat": 4, "lon": 109.688}, #Center location of the map
)
fig.show()