# Canadian Personal Income Statistics

The graphs below show personal income statistics for Canadians using data from [Statistics Canada](https://doi.org/10.25318/1110000801-eng).  

Total Income includes income from:
- employment income (salaries, commission), 
- self employment income, pension income (OAS, CPP/QPP, registered pension plans, RRIFs), 
- investment income, 
- social benefit payments (EI, workers' compensation, social assisstance), and 
- other income.

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
import logging

from wrangling import subset_plot_data_for_income_bins

logger = logging.getLogger(__name__)
FORMAT = "[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s"
logging.basicConfig(format=FORMAT)
logger.setLevel(logging.DEBUG)

In [2]:
#load the data
path = r"../data/raw/11100008.csv"
df = pd.read_csv(path, low_memory=False)

In [3]:
cols_to_keep = ['REF_DATE', 
                  'GEO', 
                  'Sex', 
                  'Age group', 
                  'Persons with income',
                  'SCALAR_FACTOR', 
                  'VALUE', 
                 ]


In [4]:
income_to_plot = ["Persons with income under $5,000",
                            "Persons with income of $5,000 and over",
                            "Persons with income of $10,000 and over",
                            "Persons with income of $15,000 and over",
                            "Persons with income of $20,000 and over",
                            "Persons with income of $25,000 and over",
                            "Persons with income of $35,000 and over",
                            "Persons with income of $50,000 and over",
                            "Persons with income of $75,000 and over",
                            "Persons with income of $100,000 and over",
                            "Persons with income of $150,000 and over",
                            "Persons with income of $200,000 and over",
                            "Persons with income of $250,000 and over"]

In [5]:


def get_persons_per_income_group(df):
    """Formats cumulative bins (e.g. <50k) to incremental bins (e.g. >25-50k)."""
    df["VALUE"].values[1:-1] = df["VALUE"].values[1:-1] - df["VALUE"].values[2:]
    return df

def create_income_bins(y):
    original_income_bins = 13
    # sum 0:5, 5:7, and then take individual values
    logger.info("create_income_bins()")
    logger.debug(f"y: /n {y}")
    if len(y)==original_income_bins:
        y = np.add.reduceat(y, [0,5,7,8,9,10,11,12])
        return y
    elif len(y)==0:
        return np.array([np.nan]*13)
    else: return y

def add_gaps(y):
    # some empty values for discontinuities
    y = np.insert(y, [4, 7], [np.nan])
    return y




def normalize_plot_data(y):
    y = np.divide(y, np.sum(y))
    return y


def format_hist_data(df):
    df = get_persons_per_income_group(df)
    y = df.VALUE.values
    y_hist = normalize_plot_data(y)
    return y_hist


def preprocess_income_bin_data(df):
    y_hist = format_hist_data(df)
    y_hist = create_income_bins(y_hist)
    y_cumulative = np.cumsum(y_hist)
    y_hist = add_gaps(y_hist)
    y_cumulative = add_gaps(y_cumulative)
    return y_hist, y_cumulative

In [6]:
def set_chart_title(plot_type, year, geo, sex, age):
    subtitle = f"{year} total income in {geo} for {sex}, {age}"
    if plot_type=="hist":
        title = "<b>Income distrubution:</b> <br>"
        
    elif plot_type=="cumulative":
        title = "<b>Cumulative income distrubution:</b> <br>"
        
    title = title + subtitle
    return title

def format_bar_chart(plot_type, fig, year, age, sex, geo):
    title = set_chart_title(plot_type, year, geo, sex, age)
    fig.update_yaxes(range=[0, 1])
    fig.update_layout(
        title=title,
        yaxis={"tickformat": ',.0%'}
        )
    return fig

def create_bar_chart(df, year, age, sex, geo, income_to_plot, cols_to_keep):
    x = ["<25k", "25-50k", "50k-75k","75k-100k",
         "...",
         "100-150k", "150-200k", "200-250k",
         "...",
         ">250k"]
    df = subset_plot_data_for_income_bins(df, year, age, sex, geo, income_to_plot, cols_to_keep)
    y_hist, y_cumulative = preprocess_income_bin_data(df)
    fig_hist = go.Figure([go.Bar(x=x, y=y_hist)])
    fig_cumulative = go.Figure([go.Bar(x=x, y=y_cumulative)])
    fig_hist = format_bar_chart("hist", fig_hist, year, age, sex, geo)
    fig_cumulative = format_bar_chart("cumulative", fig_cumulative, year, age, sex, geo)
    
    return go.FigureWidget(fig_hist), go.FigureWidget(fig_cumulative)

In [7]:
# parameters
age_values = ['All age groups', '0 to 24 years', '25 to 34 years',
       '35 to 44 years', '45 to 54 years', '55 to 64 years',
       '65 to 74 years', '75 years and over', '65 years and over']
geo_values = ['Canada', 'Newfoundland and Labrador',
       "St. John's, Newfoundland and Labrador", 'Prince Edward Island',
       'Nova Scotia', 'Halifax, Nova Scotia', 'New Brunswick',
       'Saint John, New Brunswick', 'Quebec', 'Saguenay, Quebec',
       'Québec, Quebec', 'Sherbrooke, Quebec', 'Trois-Rivières, Quebec',
       'Montréal, Quebec', 'Ottawa-Gatineau, Quebec part', 'Ontario',
       'Ottawa-Gatineau, Ontario part', 'Oshawa, Ontario',
       'Toronto, Ontario', 'Hamilton, Ontario',
       'St. Catharines-Niagara, Ontario',
       'Kitchener-Cambridge-Waterloo, Ontario', 'London, Ontario',
       'Windsor, Ontario', 'Greater Sudbury, Ontario',
       'Thunder Bay, Ontario', 'Manitoba', 'Winnipeg, Manitoba',
       'Saskatchewan', 'Regina, Saskatchewan', 'Saskatoon, Saskatchewan',
       'Alberta', 'Calgary, Alberta', 'Edmonton, Alberta',
       'British Columbia', 'Vancouver, British Columbia',
       'Victoria, British Columbia', 'Yukon', 'Northwest Territories',
       'Nunavut', 'Kingston, Ontario',
       'Abbotsford-Mission, British Columbia', 'Moncton, New Brunswick',
       'Peterborough, Ontario', 'Brantford, Ontario', 'Guelph, Ontario',
       'Barrie, Ontario', 'Kelowna, British Columbia',
       'Bay Roberts, Newfoundland and Labrador',
       'Grand Falls-Windsor, Newfoundland and Labrador',
       'Corner Brook, Newfoundland and Labrador',
       'Non CMA-CA, Newfoundland and Labrador',
       'Charlottetown, Prince Edward Island',
       'Summerside, Prince Edward Island',
       'Non CMA-CA, Prince Edward Island', 'Kentville, Nova Scotia',
       'Truro, Nova Scotia', 'New Glasgow, Nova Scotia',
       'Cape Breton, Nova Scotia', 'Non CMA-CA, Nova Scotia',
       'Fredericton, New Brunswick', 'Bathurst, New Brunswick',
       'Miramichi, New Brunswick', 'Campbellton, New Brunswick part',
       'Edmundston, New Brunswick', 'Non CMA-CA, New Brunswick',
       'Campbellton, Quebec part', 'Matane, Quebec', 'Rimouski, Quebec',
       'Rivière-du-loup, Quebec', 'Baie-Comeau, Quebec', 'Alma, Quebec',
       'Dolbeau-Mistassini, Quebec', 'Sept-Îles, Quebec',
       'Saint-Georges, Quebec', 'Thetford Mines, Quebec',
       'Cowansville, Quebec', 'Victoriaville, Quebec',
       'Shawinigan, Quebec', 'La Tuque, Quebec', 'Drummondville, Quebec',
       'Granby, Quebec', 'Saint-Hyacinthe, Quebec', 'Sorel-Tracy, Quebec',
       'Joliette, Quebec', 'Saint-Jean-sur-Richelieu, Quebec',
       'Salaberry-de-Valleyfield, Quebec', 'Lachute, Quebec',
       "Val-d'Or, Quebec", 'Amos, Quebec', 'Rouyn-Noranda, Quebec',
       'Hawkesbury, Quebec part', 'Non CMA-CA, Quebec',
       'Cornwall, Ontario', 'Hawkesbury, Ontario part',
       'Brockville, Ontario', 'Pembroke, Ontario', 'Petawawa, Ontario',
       'Belleville, Ontario', 'Cobourg, Ontario', 'Port Hope, Ontario',
       'Kawartha Lakes, Ontario', 'Centre Wellington, Ontario',
       'Ingersoll, Ontario', 'Woodstock, Ontario', 'Tillsonburg, Ontario',
       'Norfolk, Ontario', 'Stratford, Ontario', 'Chatham-Kent, Ontario',
       'Leamington, Ontario', 'Sarnia, Ontario', 'Owen Sound, Ontario',
       'Collingwood , Ontario', 'Orillia, Ontario', 'Midland, Ontario',
       'North Bay, Ontario', 'Elliot Lake, Ontario',
       'Temiskaming Shores, Ontario', 'Timmins, Ontario',
       'Sault Ste. Marie, Ontario', 'Kenora, Ontario',
       'Non CMA-CA, Ontario', 'Portage la Prairie, Manitoba',
       'Brandon, Manitoba', 'Thompson, Manitoba', 'Non CMA-CA, Manitoba',
       'Yorkton, Saskatchewan', 'Moose Jaw, Saskatchewan',
       'Swift Current, Saskatchewan', 'North Battleford, Saskatchewan',
       'Prince Albert, Saskatchewan', 'Estevan, Saskatchewan',
       'Lloydminster, Saskatchewan part', 'Non CMA-CA, Saskatchewan',
       'Medicine Hat, Alberta', 'Brooks, Alberta', 'Lethbridge, Alberta',
       'Okotoks, Alberta', 'Canmore, Alberta', 'Red Deer, Alberta',
       'Camrose, Alberta', 'Lloydminster, Alberta part',
       'Cold Lake, Alberta', 'Grande Prairie, Alberta',
       'Wood Buffalo, Alberta', 'Wetaskiwin, Alberta',
       'Non CMA-CA, Alberta', 'Cranbrook, British Columbia',
       'Penticton, British Columbia', 'Vernon, British Columbia',
       'Salmon Arm, British Columbia', 'Kamloops, British Columbia',
       'Chilliwack, British Columbia', 'Squamish, British Columbia',
       'Duncan, British Columbia', 'Nanaimo, British Columbia',
       'Parksville, British Columbia', 'Port Alberni, British Columbia',
       'Courtenay, British Columbia', 'Campbell River, British Columbia',
       'Powell River, British Columbia',
       'Williams Lake, British Columbia', 'Quesnel, British Columbia',
       'Prince Rupert, British Columbia', 'Kitimat, British Columbia',
       'Terrace, British Columbia', 'Prince George, British Columbia',
       'Dawson Creek, British Columbia',
       'Fort St. John, British Columbia', 'Non CMA-CA, British Columbia',
       'Whitehorse, Yukon', 'Non CMA-CA, Yukon',
       'Yellowknife, Northwest Territories',
       'Non CMA-CA, Northwest Territories',
       'Campbellton, combined NewBrunswick/Quebec',
       'Hawkesbury, combined Ontario/Quebec',
       'Ottawa - Gatineau, combined Ontario/Quebec',
       'Lloydminster, combined Alberta/Saskatchewan',
       'Steinbach, Manitoba', 'High River, Alberta',
       'Strathmore, Alberta', 'Sylvan Lake, Alberta', 'Lacombe, Alberta',
       'Gander, Newfoundland and Labrador', 'Sainte-Marie, Quebec',
       'Arnprior, Ontario', 'Carleton Place, Ontario',
       'Wasaga Beach, Ontario', 'Winkler, Manitoba',
       'Weyburn, Saskatchewan', 'Nelson, British Columbia']
gender_values = ['Both sexes', 'Males', 'Females']
year_values = [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017]

In [8]:
initial_age = "35 to 44 years"
initial_year = 2017
initial_geo = "Canada"
initial_gender = "Females"



select_age = widgets.Dropdown(options=age_values, value=initial_age)
select_year = widgets.Dropdown(options=year_values, value=initial_year)
select_geo = widgets.Dropdown(options=geo_values, value=initial_geo)
select_gender = widgets.Dropdown(options=gender_values, value=initial_gender)

hist, cumulative_plot = create_bar_chart(df, initial_year, initial_age, initial_gender, initial_geo, income_to_plot, cols_to_keep)

def update_bar_charts_callback(ignore):
    global hist
    global cumulative_plot
    global df
    global income_to_plot
    global cols_to_keep
    year = select_year.value
    geo = select_geo.value
    age = select_age.value
    gender = select_gender.value
    
    df_subset = subset_plot_data_for_income_bins(df, year, age, gender, geo, income_to_plot, cols_to_keep)
    y_hist, y_cumulative = preprocess_income_bin_data(df_subset)
    hist.data[0]['y'] = y_hist
    hist.update_layout(title=set_chart_title("hist", year, geo, gender, age))
    cumulative_plot.data[0]['y'] = y_cumulative
    cumulative_plot.update_layout(title=set_chart_title("cumulative", year, geo, gender, age))
    
select_age.observe(update_bar_charts_callback, "value")
select_geo.observe(update_bar_charts_callback, "value")
select_year.observe(update_bar_charts_callback, "value")
select_gender.observe(update_bar_charts_callback, "value")


bar_chart_dropdowns = widgets.HBox([select_year, select_geo, select_age, select_gender])
bar_charts = widgets.HBox([hist, cumulative_plot])
layout = widgets.VBox([bar_chart_dropdowns, bar_charts])
layout

[<ipython-input-5-b536273e0053>:9 -   create_income_bins() ] create_income_bins()
[<ipython-input-5-b536273e0053>:10 -   create_income_bins() ] y: /n [0.04984406 0.0497454  0.05660062 0.05581986 0.05897721 0.12569764
 0.19221216 0.20935021 0.11856787 0.06225896 0.01209745 0.00437138
 0.00445718]


VBox(children=(HBox(children=(Dropdown(index=17, options=(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008…