# Canadian median income in constant (2018) dollars

Below you can explore median income in 2018 dollars for different regions in Canada between 1975-2017.  The data is based on [stats provided by Statistics Canada](https://doi.org/10.25318/1110023901-eng).

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Dropdown, Checkbox, IntSlider
import logging
import sys # for relative local imports
sys.path.append('..')

# local imports
from data_analysis import wrangling

logger = logging.getLogger(__name__)
FORMAT = "[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s"
logging.basicConfig(format=FORMAT)
logger.setLevel(logging.DEBUG)

In [2]:
# 5.5s read time
#load the data
path = r"../data/raw/11100239.csv"
df = pd.read_csv(path, low_memory=False)

In [3]:
cols_to_keep = ['REF_DATE', 
                'GEO', 
                'Sex', 
                'Age group', 
                'Income source',
                'Statistics',
                'SCALAR_FACTOR', 
                'VALUE', 
                 ]

selections = {'REF_DATE': np.array([1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986,
                1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
                1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
                2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018],
               dtype=int),
         'GEO': np.array(['Canada', 'Atlantic provinces', 'Newfoundland and Labrador',
                'Prince Edward Island', 'Nova Scotia', 'New Brunswick', 'Quebec',
                'Ontario', 'Prairie provinces', 'Manitoba', 'Saskatchewan',
                'Alberta', 'British Columbia', 'Québec, Quebec',
                'Montréal, Quebec', 'Ottawa-Gatineau, Ontario/Quebec',
                'Toronto, Ontario', 'Winnipeg, Manitoba', 'Calgary, Alberta',
                'Edmonton, Alberta', 'Vancouver, British Columbia'], dtype=object),
         'Sex': np.array(['Both sexes', 'Males', 'Females'], dtype=object),
         'Age group': np.array(['16 years and over', '16 to 24 years', '25 to 54 years',
                '25 to 34 years', '35 to 44 years', '45 to 54 years',
                '55 to 64 years', '65 years and over'], dtype=object),
         'Income source': np.array(['Total income', 'Market income', 'Employment income',
                'Wages, salaries and commissions', 'Self-employment income',
                'Investment income', 'Retirement income', 'Other income',
                'Government transfers',
                "Old Age Security (OAS) and Guaranteed Income Supplement (GIS), Spouse's Allowance (SPA)",
                'Canada Pension Plan (CPP) and Quebec Pension Plan (QPP) benefits',
                'Child benefits', 'Employment Insurance (EI) benefits',
                'Social assistance', 'Other government transfers'], dtype=object),
         'Statistics': np.array(['Average income (excluding zeros)',
                'Median income (excluding zeros)'], dtype=object)}

provinces = ['Atlantic provinces', 'Newfoundland and Labrador',
                'Prince Edward Island', 'Nova Scotia', 'New Brunswick', 'Quebec',
                'Ontario', 'Prairie provinces', 'Manitoba', 'Saskatchewan',
                'Alberta', 'British Columbia']
cities = ['Québec, Quebec',
                'Montréal, Quebec', 'Ottawa-Gatineau, Ontario/Quebec',
                'Toronto, Ontario', 'Winnipeg, Manitoba', 'Calgary, Alberta',
                'Edmonton, Alberta', 'Vancouver, British Columbia']

initial_sex = "Females"
initial_geo = cities[-1]
initial_age = selections["Age group"][2]

In [4]:
def subset_for_scatter_plot(df, year, income_source, income_to_plot, cols_to_keep):
    df = df.loc[:,cols_to_keep]
    df = df[df["Income source"]==income_source]
    df = df[df["Statistics"]==income_to_plot]
    return df

In [5]:
plot_data = subset_for_scatter_plot(df, 2017, "Total income", 'Median income (excluding zeros)', cols_to_keep)

In [6]:
sex_selector = widgets.SelectMultiple(
    options=selections.get("Sex"),
    value=(initial_sex,),
    #rows=10,
    description='Sex',
    disabled=False
)
geo_selector = widgets.SelectMultiple(
    options=selections.get("GEO"),
    value = (initial_geo,),
    description = "Location"
)
age_dropdown = widgets.Dropdown(
    options=selections.get("Age group"),
    value=initial_age,
    description="Age group")

In [7]:
def format_title(sex, age):
    return f"Median income (2018 dollars) for {', '.join(sex)} aged {age}"

def create_scatter_plot(df, sex, age, geo)->go.FigureWidget:
    """
    Parameters
    ----------
    df : pd.DataFrame
    sex : tuple
    age : str
    locations : tuple

    """

    df = df[df["Age group"]==age]
    df = df[df.GEO.isin(geo)]
    df = df[df.Sex.isin(sex)]
    fig = px.line(df, x="REF_DATE", y="VALUE", color="GEO", line_dash="Sex")
    fig.update_xaxes(range=[1975, 2020])
    fig.update_yaxes(range=[0, 90000])
    fig.update_layout(title=format_title(sex, age))
    # fig.update_layout(legend_title= "Location")
    
    return go.FigureWidget(fig)

# scatter_plot = create_scatter_plot(plot_data, sex_selector.value, age_dropdown.value, geo_selector.value)
scatter_plot = create_scatter_plot(
    plot_data,
    sex_selector.value,
    age_dropdown.value,
    geo_selector.value)

In [8]:
def clear_plot_data(plot):
    # we have to first delete the data in the plot
    plot.data = []
    return plot

def update_plot_data(old_plot, new_plot):
    for trace in new_plot.data:
        old_plot.add_trace(trace)
    return old_plot
    
def update_scatter_plot(change):
    global scatter_plot
    global plot_data
    age = age_dropdown.value
    geo = geo_selector.value
    sex = sex_selector.value

    # this is the current approach which doesn't work because it points to a new object
    # scatter_plot = create_scatter_plot(df, sex, age, geo)
    
    # To display the plot, we have to operate on the 
    # original plot object instead of creating a new object
    scatter_plot = clear_plot_data(scatter_plot)
    new_plot = create_scatter_plot(plot_data, sex, age, geo)
    scatter_plot = update_plot_data(scatter_plot, new_plot)
    scatter_plot.update_layout(title=format_title(sex, age))
    # scatter_plot.update_layout(legend={"title": "Location"})


age_dropdown.observe(update_scatter_plot, 'value')
geo_selector.observe(update_scatter_plot, 'value')
sex_selector.observe(update_scatter_plot, 'value')

left_selections = VBox([age_dropdown, sex_selector])
all_selections = HBox([left_selections, geo_selector])
VBox([all_selections, scatter_plot])

VBox(children=(HBox(children=(VBox(children=(Dropdown(description='Age group', index=2, options=('16 years and…