In [69]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
import logging
import wrangling

logger = logging.getLogger(__name__)
FORMAT = "[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s"
logging.basicConfig(format=FORMAT)
logger.setLevel(logging.DEBUG)

In [70]:
# 5.5s read time
#load the data
path = r"../data/raw/11100239.csv"
df = pd.read_csv(path, low_memory=False)
df.head(2)

Unnamed: 0,REF_DATE,GEO,DGUID,Age group,Sex,Income source,Statistics,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,1976,Canada,2016A000011124,16 years and over,Both sexes,Total income,Number of persons,Number,223,thousands,3,v107660854,1.1.1.1.1,16797.0,,,,0
1,1976,Canada,2016A000011124,16 years and over,Both sexes,Total income,Number with income,Number,223,thousands,3,v107660855,1.1.1.1.2,14167.0,,,,0


In [3]:
cols_to_keep = ['REF_DATE', 
                'GEO', 
                'Sex', 
                'Age group', 
                'Income source',
                'Statistics',
                'SCALAR_FACTOR', 
                'VALUE', 
                 ]

In [39]:
selections = {'REF_DATE': np.array([1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986,
                1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
                1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
                2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018],
               dtype=int),
         'GEO': np.array(['Canada', 'Atlantic provinces', 'Newfoundland and Labrador',
                'Prince Edward Island', 'Nova Scotia', 'New Brunswick', 'Quebec',
                'Ontario', 'Prairie provinces', 'Manitoba', 'Saskatchewan',
                'Alberta', 'British Columbia', 'Québec, Quebec',
                'Montréal, Quebec', 'Ottawa-Gatineau, Ontario/Quebec',
                'Toronto, Ontario', 'Winnipeg, Manitoba', 'Calgary, Alberta',
                'Edmonton, Alberta', 'Vancouver, British Columbia'], dtype=object),
         'Sex': np.array(['Both sexes', 'Males', 'Females'], dtype=object),
         'Age group': np.array(['16 years and over', '16 to 24 years', '25 to 54 years',
                '25 to 34 years', '35 to 44 years', '45 to 54 years',
                '55 to 64 years', '65 years and over'], dtype=object),
         'Income source': np.array(['Total income', 'Market income', 'Employment income',
                'Wages, salaries and commissions', 'Self-employment income',
                'Investment income', 'Retirement income', 'Other income',
                'Government transfers',
                "Old Age Security (OAS) and Guaranteed Income Supplement (GIS), Spouse's Allowance (SPA)",
                'Canada Pension Plan (CPP) and Quebec Pension Plan (QPP) benefits',
                'Child benefits', 'Employment Insurance (EI) benefits',
                'Social assistance', 'Other government transfers'], dtype=object),
         'Statistics': np.array(['Average income (excluding zeros)',
                'Median income (excluding zeros)'], dtype=object)}

In [5]:
provinces = ['Atlantic provinces', 'Newfoundland and Labrador',
                'Prince Edward Island', 'Nova Scotia', 'New Brunswick', 'Quebec',
                'Ontario', 'Prairie provinces', 'Manitoba', 'Saskatchewan',
                'Alberta', 'British Columbia']
cities = ['Québec, Quebec',
                'Montréal, Quebec', 'Ottawa-Gatineau, Ontario/Quebec',
                'Toronto, Ontario', 'Winnipeg, Manitoba', 'Calgary, Alberta',
                'Edmonton, Alberta', 'Vancouver, British Columbia']

In [6]:
temp = wrangling.subset_plot_data_for_scatter_plot(df, 2017, '25 to 34 years', "Females", "Canada", ["Total income"], ['Median income (excluding zeros)'], cols_to_keep)

In [7]:
def subset_for_scatter_plot(df, year, income_source, income_to_plot, cols_to_keep):
    df = df.loc[:,cols_to_keep]
    df = df[df["Income source"]==income_source]
    df = df[df["Statistics"]==income_to_plot]
    return df

In [9]:
plot_data = subset_for_scatter_plot(df, 2017, "Total income", 'Median income (excluding zeros)', cols_to_keep)

In [10]:
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Dropdown, Checkbox, IntSlider
import plotly.graph_objects as go
import plotly.express as px

In [58]:
sex_selector = widgets.SelectMultiple(
    options=selections.get("Sex"),
    value=['Females'],
    #rows=10,
    description='Sex',
    disabled=False
)

In [59]:
geo_selector = widgets.SelectMultiple(
    options=selections.get("GEO"),
    value = cities,
    description = "Location"
)

In [74]:
def create_scatter_plot(df, sex, age, locations)->go.FigureWidget:
    """
    Parameters
    ----------
    df : pd.DataFrame
    sex : tuple
    age : str
    locations : tuple

    """
    
    df = df[df["Age group"]==age]
    df = df[df.GEO.isin(cities)]
    df = df[df.Sex.isin(sex)]
    fig = px.line(df, x="REF_DATE", y="VALUE", color="GEO")
    fig.update_xaxes(range=[1975, 2020])
    fig.update_yaxes(range=[0, 90000])
    fig.update_layout(title=f"Median income for {sex} aged {age} in 2018 constant dollars")
    return go.FigureWidget(fig)

scatter_plot = create_scatter_plot(plot_data, "Both sexes", ('35 to 44 years'), cities)

TypeError: create_scatter_plot() missing 3 required positional arguments: 'sex', 'age', and 'locations'

styling scatter plot line style
sex_line_styles = {"Both sexes": "line", "Females": "dash", "Males": "dot"}
line=dict()

In [65]:
scatter_plot.data

(Scatter({
     'hoverlabel': {'namelength': 0},
     'hovertemplate': 'GEO=Québec, Quebec<br>REF_DATE=%{x}<br>VALUE=%{y}',
     'legendgroup': 'Québec, Quebec',
     'line': {'color': '#636efa', 'dash': 'solid'},
     'mode': 'lines',
     'name': 'Québec, Quebec',
     'showlegend': True,
     'uid': '05cd9877-30e1-4edb-9dc3-5ce3bb4ef5a3',
     'x': array([1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987,
                 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
                 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
                 2012, 2013, 2014, 2015, 2016, 2017, 2018], dtype=int64),
     'xaxis': 'x',
     'y': array([53800., 46500., 56400., 52100., 54100., 50300., 45900., 45900., 51200.,
                 52100., 45400., 49100., 46800., 45100., 43900., 47100., 43400., 44100.,
                 44400., 48100., 44500., 44400., 44700., 43200., 41500., 44600., 41300.,
                 40700., 4800

In [66]:
geo_selector.value

('Québec, Quebec',
 'Montréal, Quebec',
 'Ottawa-Gatineau, Ontario/Quebec',
 'Toronto, Ontario',
 'Winnipeg, Manitoba',
 'Calgary, Alberta',
 'Edmonton, Alberta',
 'Vancouver, British Columbia')

In [63]:
def update_scatter_plot(change):
    global scatter_plot
    age = age_dropdown.value
    geo = geo_selector.value
    sex = sex_selector.value
    
    scatter_plot = create_scatter_plot(df, sex, age, locations)

age_dropdown.observe(update_scatter_plot, 'value')
geo_selector.observe(update_scatter_plot, 'value')
sex_selector.observe(update_scatter_plot, 'value')

In [64]:
selections_box = VBox([age_dropdown, sex_selector, geo_selector])
HBox([selections_box, scatter_plot])

HBox(children=(VBox(children=(Dropdown(description='Age group', options=('16 years and over', '16 to 24 years'…

In [67]:
geo_selector.value

('Saskatchewan',)

In [None]:
plot_data["Age group"].unique()

In [None]:
plot_df = plot_data[plot_data.Sex=="Both sexes"]
plot_df.head(2)

In [None]:
fig.data

In [None]:
provinces = 

In [None]:
fig.data

In [None]:
go.Figure(data=go.Scatter(x=df.REF_DATE, y=df.VALUE, mode='markers', marker_color=df.GEO, ))