# Life Expectancy

In [1]:
import pandas as pd
import requests, zipfile, io
import re
import os

## Technical Setup

In [2]:
%load_ext autoreload
%autoreload 2

pd.set_option('display.max_colwidth', 200)
pd.set_option('display.max_columns', 200)

data_dir = '../data'
cache_dir = os.path.join(data_dir, 'cache')
import sys
sys.path.append('..')

from canadadata.statscan import to_wide_format, read_statscan_csv
from canadadata.io import unzip_data

## Download Data

In [3]:
unzip_data('https://www150.statcan.gc.ca/n1/tbl/csv/39100007-eng.zip', cache_dir)

('../data\\cache\\39100007.csv', '../data\\cache\\39100007_MetaData.csv')

## Life Expectancy Data

In [90]:
statscan_data = read_statscan_csv(os.path.join(cache_dir, '39100007.csv'))
lifeexp_data = to_wide_format(statscan_data)

In [92]:
lifeexp_data.memory_usage()

25625376

In [81]:
lifeexp_data.dtypes

REF_DATE                                                              object
GEO                                                                 category
DGUID                                                                 object
Age group                                                           category
Sex                                                                 category
UOM                                                                 category
UOM_ID                                                              category
SCALAR_FACTOR                                                         object
SCALAR_ID                                                              int64
VECTOR                                                                object
COORDINATE                                                            object
STATUS                                                                object
SYMBOL                                                               float64

In [89]:
lifeexp_data.SYMBOL.value_counts()

Series([], Name: SYMBOL, dtype: int64)

# Save to PyArrow

In [4]:
import pyarrow as pa
import pyarrow.parquet as pq

data_table = pa.Table.from_pandas(data)
pq.write_table(data_table, '../data/LifeExpectancy.parquet')

In [12]:
len(data)
data.Element.astype('category').memory_usage()

1671232

In [6]:
def get_elements(data):
    return data.Element.drop_duplicates().tolist()

def get_element_renames(data):
    return {el: '' for el in get_elements(data)}

get_element_renames(data)

{'Number of survivors at age x (lx)': '',
 'Number of deaths between age x and x+n (dx)': '',
 'Death probability between age x and x+n (qx)': '',
 'Margin of error associated with the death probability (m.e.(qx))': '',
 'Probability of survival between age x and x+n (px)': '',
 'Number of life years lived between age x and x+n (Lx)': '',
 'Cumulative number of life years lived beyond age x (Tx)': '',
 'Life expectancy (in years) at age x (ex)': '',
 'Margin of error associated with the life expectancy (m.e.(ex))': ''}

In [8]:
data15 = data[data.REF_DATE =='2013/2015']

In [9]:
partition = 'REF_DATE'
dimensions = ['GEO', 'Age group', 'Sex','Element',]
value = ['VALUE']

In [5]:
REF_COLS = ['REF_DATE', 'GEO','DGUID', 'Age group', 'Sex','Element', 'UOM','UOM_ID','SCALAR_FACTOR','SCALAR_ID', 'VECTOR',
 'COORDINATE', 'VALUE','STATUS','SYMBOL', 'TERMINATED', 'DECIMALS']

In [6]:
lf_url = 'https://open.canada.ca/data/en/dataset/74ded0d6-c629-4a5f-bdd2-9c088c9b0d54'
'https://www150.statcan.gc.ca/n1/tbl/csv/13100114-eng.zip'

In [11]:
class StatscanDataset(object):
    
    def __init__(self, url:str, pivot_column:str):
        self.url = url
        self.pivot_column = pivot_column
        
    def get_data(self, cache_dir=cache_dir, wide=True):
        files = unzip_data(self.url, cache_dir)
        data = read_statscan_csv(files[0])
        if wide:
            data = to_wide_format(data, pivot_column=self.pivot_column)
        return data

## Life Expectancy Data

In [12]:
lifeexp_dataset = StatscanDataset('https://www150.statcan.gc.ca/n1/tbl/csv/13100114-eng.zip', 'Element')
lifeexp_data = lifeexp_dataset.get_data()

In [13]:
lifeexp_data

Unnamed: 0,REF_DATE,GEO,DGUID,Age group,Sex,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,STATUS,SYMBOL,TERMINATED,DECIMALS,Cumulative number of life years lived beyond age x (Tx),Death probability between age x and x+1 (qx),Life expectancy (in years) at age x (ex),Margin of error of the death probability (m.e.(qx)),Margin of error of the life expectancy (m.e.(ex)),Number of deaths between age x and x+1 (dx),Number of life years lived between age x and x+1 (Lx),Number of survivors at age x (lx),Probability of survival between age x and x+1 (px)
0,1980/1982,Canada,2016A000011124,0 years,Both sexes,Number,223,units,0,v123302098,1.1.1.1,,,,0,7543058.0,0.00976,75.4,0.00018,0.0,976.0,99152.0,100000.0,0.99024
1,1980/1982,Canada,2016A000011124,0 years,Males,Number,223,units,0,v123302099,1.1.2.1,,,,0,7197948.0,0.01098,72.0,0.00027,0.1,1098.0,99041.0,100000.0,0.98902
2,1980/1982,Canada,2016A000011124,0 years,Females,Number,223,units,0,v123302100,1.1.3.1,,,,0,7907529.0,0.00848,79.1,0.00024,0.1,848.0,99268.0,100000.0,0.99152
3,1980/1982,Canada,2016A000011124,1 year,Both sexes,Number,223,units,0,v123302101,1.2.1.1,,,,0,7443906.0,0.00068,75.2,0.00005,0.0,68.0,98988.0,99024.0,0.99932
4,1980/1982,Canada,2016A000011124,1 year,Males,Number,223,units,0,v123302102,1.2.2.1,,,,0,7098907.0,0.00075,71.8,0.00007,0.1,74.0,98864.0,98902.0,0.99925
5,1980/1982,Canada,2016A000011124,1 year,Females,Number,223,units,0,v123302103,1.2.3.1,,,,0,7808261.0,0.00062,78.8,0.00007,0.1,62.0,99118.0,99152.0,0.99938
6,1980/1982,Canada,2016A000011124,2 years,Both sexes,Number,223,units,0,v123302104,1.3.1.1,,,,0,7344919.0,0.00056,74.2,0.00004,0.0,55.0,98928.0,98956.0,0.99944
7,1980/1982,Canada,2016A000011124,2 years,Males,Number,223,units,0,v123302105,1.3.2.1,,,,0,7000043.0,0.00063,70.8,0.00007,0.1,62.0,98800.0,98828.0,0.99937
8,1980/1982,Canada,2016A000011124,2 years,Females,Number,223,units,0,v123302106,1.3.3.1,,,,0,7709143.0,0.00049,77.8,0.00006,0.1,48.0,99062.0,99090.0,0.99951
9,1980/1982,Canada,2016A000011124,3 years,Both sexes,Number,223,units,0,v123302107,1.4.1.1,,,,0,7245991.0,0.00047,73.3,0.00004,0.0,46.0,98875.0,98901.0,0.99953


## Retail prices

In [14]:
retail_prices_dataset = StatscanDataset('https://www150.statcan.gc.ca/n1/tbl/csv/18100251-eng.zip', 
                                        pivot_column='North American Industry Classification System (NAICS)')
retail_prices = retail_prices_dataset.get_data()
retail_prices

Unnamed: 0,REF_DATE,GEO,DGUID,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,STATUS,SYMBOL,TERMINATED,DECIMALS,All other general merchandise stores,All other miscellaneous store retailers,Art dealers,Automotive parts and accessories stores,"Automotive parts, accessories and tire stores","Beer, wine and liquor stores",Book stores and news dealers,Building material and garden equipment and supplies dealers,Building material and supplies dealers,Children's and infants' clothing stores,Clothing accessories stores,Clothing and clothing accessories stores,Clothing stores,Convenience stores,"Cosmetics, beauty supplies and perfume stores",Department stores,Electronics and appliance stores,Family clothing stores,Fish and seafood markets,Floor covering stores,Florists,Food and beverage stores,Fruit and vegetable markets,Furniture and home furnishings stores,Furniture stores,Gasoline stations,Gasoline stations with convenience stores,General merchandise stores,"Gift, novelty and souvenir stores",Grocery stores,Hardware stores,Health and personal care stores,"Hobby, toy and game stores",Home centres,Home furnishings stores,Jewellery stores,"Jewellery, luggage and leather goods stores",Lawn and garden equipment and supplies stores,Luggage and leather goods stores,Meat markets,Men's clothing stores,Miscellaneous store retailers,Mobile home dealers,Motor vehicle and parts dealers,Musical instrument and supplies stores,Nursery stores and garden centres,Office supplies and stationery stores,"Office supplies, stationery and gift stores",Optical goods stores,Other building material dealers,Other clothing stores,Other gasoline stations,Other general merchandise stores,Other health and personal care stores,Other home furnishings stores,Other miscellaneous store retailers,Other specialty food stores,Outdoor power equipment stores,Paint and wallpaper stores,Pet and pet supplies stores,Pharmacies and drug stores,Retail trade,"Sewing, needlework and piece goods stores",Shoe stores,Specialty food stores,Sporting goods stores,"Sporting goods, hobby and musical instrument stores","Sporting goods, hobby, book and music stores",Supermarkets and other grocery (except convenience) stores,Tire dealers,Used merchandise stores,Warehouse clubs,Women's clothing stores
0,2008-01,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,97.9,90.5,,93.4,87.0,93.5,90.1,84.2,83.4,,,101.2,99.7,84.2,92.2,,125.8,102.5,,84.8,,81.7,,95.3,96.4,89.2,83.2,85.1,,79.5,,83.3,112.6,79.5,93.5,94.4,95.8,100.8,107.9,,98.6,90.1,,,96.0,101.0,89.5,89.8,96.9,102.9,104.0,91.6,97.5,97.0,100.7,87.9,88.5,,95.2,76.3,81.6,87.7,88.8,120.0,81.7,100.4,102.6,100.3,79.0,82.5,,,96.2
1,2008-02,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,97.1,90.5,,93.8,87.6,93.9,91.7,84.9,84.2,,,100.8,99.2,85.5,91.2,,125.3,101.8,,84.2,,81.8,,94.7,95.6,90.4,82.8,84.5,,79.7,,86.5,124.1,80.4,93.3,94.4,95.8,101.9,108.1,,98.8,91.3,,,96.1,101.7,93.1,92.2,99.8,103.2,103.6,93.6,96.7,98.0,100.7,87.6,88.4,,95.1,75.1,85.1,88.3,88.8,120.3,80.7,99.1,104.7,102.3,79.1,83.3,,,96.2
2,2008-03,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,95.6,90.3,,93.8,89.3,93.9,92.3,84.7,83.9,,,101.0,99.6,85.5,92.2,,124.9,100.8,,84.1,,83.1,,95.2,96.5,91.7,84.6,84.2,,81.3,,87.2,118.9,80.1,93.2,94.4,95.8,103.0,108.1,,100.7,90.8,,,96.2,102.9,92.1,91.5,98.3,103.2,103.2,94.7,95.2,98.2,100.6,87.7,88.6,,95.2,75.8,85.8,88.9,88.8,118.3,81.7,101.0,104.6,102.3,80.9,86.2,,,99.1
3,2008-04,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,95.3,91.2,,92.6,89.4,94.1,93.7,85.7,85.0,,,101.9,101.0,88.0,92.7,,122.6,102.5,,88.0,,84.6,,96.5,98.1,91.8,87.2,83.9,,83.1,,87.8,111.8,81.1,94.0,94.4,96.0,100.5,109.6,,101.4,89.0,,,96.2,99.8,84.8,87.0,94.6,105.5,104.6,93.7,95.0,96.5,98.9,88.5,89.6,,94.6,76.7,86.8,89.4,88.5,117.4,81.6,101.6,103.1,101.4,82.6,87.2,,,99.5
4,2008-05,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,97.4,91.3,,92.6,91.2,94.6,92.9,84.9,84.1,,,101.6,100.5,87.3,93.0,,121.9,101.7,,88.0,,85.2,,96.3,97.8,92.7,89.5,85.0,,83.7,,87.1,111.9,80.0,93.9,94.8,96.3,102.7,109.8,,101.4,89.9,,,96.0,102.4,87.8,88.9,96.9,105.3,103.3,94.1,97.1,98.0,98.8,88.5,89.6,,94.6,76.7,85.8,89.8,88.5,117.2,81.8,99.8,102.0,100.3,83.4,90.1,,,99.4
5,2008-06,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,103.0,91.9,,92.7,91.5,93.1,91.0,84.8,84.0,,,100.3,99.8,87.1,93.0,,119.9,101.2,,87.0,,85.3,,95.7,97.0,97.3,88.5,87.1,,84.0,,89.7,109.6,80.1,93.7,94.4,96.1,103.3,111.2,,100.2,90.4,,,95.9,102.7,89.6,90.0,101.4,103.6,102.9,100.9,102.6,96.2,99.3,88.6,90.4,,94.7,75.6,88.6,90.8,88.5,109.8,83.2,98.6,100.7,98.9,83.7,90.6,,,98.8
6,2008-07,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,98.0,91.0,,92.3,90.4,94.5,92.0,84.9,84.1,,,99.7,99.0,90.1,93.2,,122.1,100.1,,91.6,,86.7,,95.9,95.5,96.7,86.1,85.8,,85.6,,89.3,107.2,80.1,96.5,94.6,96.1,102.7,109.4,,100.7,90.5,,,96.6,102.5,89.8,90.0,96.1,103.6,98.4,101.1,97.7,96.3,100.5,89.0,90.3,,95.8,79.1,88.5,91.0,91.0,110.1,82.1,98.1,99.9,98.4,85.1,89.1,,,98.6
7,2008-08,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,98.1,91.6,,92.3,91.3,94.3,93.0,84.7,84.0,,,99.7,99.0,89.7,93.2,,120.0,99.6,,92.5,,87.0,,96.5,96.3,97.0,83.6,86.1,,86.1,,89.2,107.2,80.0,96.9,94.5,96.0,99.7,109.5,,100.9,89.6,,,96.7,98.1,85.9,87.5,90.7,103.5,101.2,102.6,97.8,96.4,100.6,89.5,90.9,,95.8,79.4,88.7,91.1,91.0,109.9,81.0,98.1,100.0,98.7,85.7,90.6,,,98.5
8,2008-09,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,97.6,91.8,,92.4,91.1,94.6,92.9,85.4,84.9,,,100.8,100.1,90.0,93.2,,119.4,101.5,,92.2,,86.4,,96.5,96.8,100.2,84.8,85.9,,85.3,,90.5,110.7,81.6,96.0,94.3,95.8,96.4,109.4,,102.4,90.5,,,96.7,93.5,87.6,88.5,90.4,100.0,101.4,106.6,97.3,96.0,99.1,89.6,91.3,,97.3,79.3,90.1,91.6,90.9,113.2,80.7,96.4,99.8,98.5,84.8,90.2,,,98.7
9,2008-10,Canada,2016A000011124,"Index, 2013=100",186,units,0,v107645719,1.1,,,,1,93.0,93.7,,95.7,94.6,94.9,94.9,85.3,84.8,,,100.2,99.8,91.1,92.2,,125.0,100.5,,94.3,,87.3,,96.9,96.6,100.6,85.9,84.0,,86.2,,88.8,108.6,81.4,97.4,92.0,93.8,96.5,109.3,,102.6,90.9,,,96.9,93.8,85.4,87.8,98.2,100.7,101.2,106.7,92.7,96.4,99.9,91.3,91.8,,97.5,81.2,87.8,91.7,90.5,112.4,82.8,95.9,98.9,98.2,85.7,93.8,,,99.4
