# 2011 National Household Survey

In [1]:
%%bash
cp ../data/waterloo-region/planning-district-codes.csv ../sources/canada/nhs-planningdistrict/

In [2]:
from context import tools
import agate
import re

In [3]:
%cd ../sources/canada/nhs-planningdistrict/

/home/oliver/git/strong-neighbourhoods/sources/canada/nhs-planningdistrict


In [4]:
# strip accented characters from column names
tools.recode_headings("NHS all.csv", csv_out="nhs-planning-districts.csv")

In [5]:
%%bash

# index rows with line_number column
csvcut -l nhs-planning-districts.csv > nhs-planning-districts-indexed.csv

# write column names to a file
csvcut -n nhs-planning-districts-indexed.csv > nhs-planning-districts-columns.txt

head -n 5 nhs-planning-districts-indexed.csv | csvcut -c 1,2 | csvlook

|--------------+----------------------------|
|  line_number | Geography                  |
|--------------+----------------------------|
|  1           | WAT - PD - C10 ( 15.6%)    |
|  2           | WAT - PD - C1014 ( 28.9%)  |
|  3           | WAT - PD - C103 ( 32.8%)   |
|  4           | WAT - PD - C109 ( 26.7%)   |
|--------------+----------------------------|


## Planning district codes

In [6]:
%%bash

# extract planning district code and non-response rate from Geography column
csvcut -c 1,2 nhs-planning-districts-indexed.csv > nhs-codes-tmp.csv
head -n 5 nhs-codes-tmp.csv | csvlook

|--------------+----------------------------|
|  line_number | Geography                  |
|--------------+----------------------------|
|  1           | WAT - PD - C10 ( 15.6%)    |
|  2           | WAT - PD - C1014 ( 28.9%)  |
|  3           | WAT - PD - C103 ( 32.8%)   |
|  4           | WAT - PD - C109 ( 26.7%)   |
|--------------+----------------------------|


In [7]:
# create a new csv file from nhs-codes-tmp.csv
geo = agate.Table.from_csv("nhs-codes-tmp.csv")

def match_code(row):
    _match = re.match("WAT - PD - (.*) \( (.*)%\)", row["Geography"])
    _list = [int(row["line_number"])]
    _list.extend(list(_match.groups()))
    return _list

columns = ["index", "code", "non-response"]
types = [agate.Number(), agate.Text(), agate.Number()]
values = [match_code(row) for row in geo.rows]
codes = agate.Table(values, columns, types)
codes.to_csv("nhs-planning-districts-codes.csv")

In [8]:
%%bash
head -n5 nhs-planning-districts-codes.csv | csvlook

|--------+-------+---------------|
|  index | code  | non-response  |
|--------+-------+---------------|
|  1     | C10   | 15.6          |
|  2     | C1014 | 28.9          |
|  3     | C103  | 32.8          |
|  4     | C109  | 26.7          |
|--------+-------+---------------|


## Immigration

In [9]:
immigration_columns = (
  (1, "index"),
  (18, "total"),
  (19, "non-immigrants"),
  (20, "immigrants"),
  (21, "immigrants-before-1971"),
  (22, "immigrants-1971-1980"),
  (23, "immigrants-1981-1990"),
  (24, "immigrants-1991-2000"),
  (25, "immigrants-2001-2011")
)
print ",".join(str(col) for col, name in immigration_columns)

1,18,19,20,21,22,23,24,25


In [10]:
print [name for col, name in immigration_columns]

['index', 'total', 'non-immigrants', 'immigrants', 'immigrants-before-1971', 'immigrants-1971-1980', 'immigrants-1981-1990', 'immigrants-1991-2000', 'immigrants-2001-2011']


In [11]:
%%bash

csvcut -c 1,18,19,20,21,22,23,24,25 nhs-planning-districts-indexed.csv > immigration-tmp.csv
csvcut -n immigration-tmp.csv

  1: line_number
  2: Total population in private households by immigrant status and period of immigration
  3:   Non-immigrants
  4:   Immigrants
  5:     Before 1971
  6:     1971 to 1980
  7:     1981 to 1990
  8:     1991 to 2000
  9:     2001 to 2011


In [12]:
immigration = agate.Table.from_csv("immigration-tmp.csv", [name for col, name in immigration_columns])
immigration.to_csv("immigration.csv")

In [13]:
%%bash
csvcut -n immigration.csv

  1: index
  2: total
  3: non-immigrants
  4: immigrants
  5: immigrants-before-1971
  6: immigrants-1971-1980
  7: immigrants-1981-1990
  8: immigrants-1991-2000
  9: immigrants-2001-2011


## Mobility

In [14]:
mobility_columns = (
    (1, "index"),
    (1746, "total"),
    (1747, "non-movers"),
    (1748, "movers"),
    (1749, "movers-non-migrants"),
    (1750, "movers-migrants"),
    (1751, "migrants-internal"),
    (1752, "internal-intra"),
    (1753, "internal-inter"),
    (1754, "migrants-external")
)
print ",".join(str(col) for col, name in mobility_columns)

1,1746,1747,1748,1749,1750,1751,1752,1753,1754


In [15]:
%%bash
csvcut -c 1,1746,1747,1748,1749,1750,1751,1752,1753,1754 nhs-planning-districts-indexed.csv > mobility-tmp.csv
csvcut -n mobility-tmp.csv

  1: line_number
  2: Total - Mobility status 5 years ago
  3:   Non-movers
  4:   Movers
  5:     Non-migrants
  6:     Migrants
  7:       Internal migrants
  8:         Intraprovincial migrants
  9:         Interprovincial migrants
 10:       External migrants


In [16]:
mobility = agate.Table.from_csv("mobility-tmp.csv", [name for col, name in mobility_columns])
mobility.to_csv("mobility.csv")

In [17]:
%%bash
csvcut -n mobility.csv

  1: index
  2: total
  3: non-movers
  4: movers
  5: movers-non-migrants
  6: movers-migrants
  7: migrants-internal
  8: internal-intra
  9: internal-inter
 10: migrants-external


## Educational achievement

In [18]:
%%bash

csvcut -c 1,1803-1810 nhs-planning-districts-indexed.csv > education-tmp.csv
csvcut -n education-tmp.csv

  1: line_number
  2: Total population aged 25 to 64 years by highest certificate, diploma or degree
  3:   No certificate, diploma or degree
  4:   High school diploma or equivalent
  5:   Postsecondary certificate, diploma or degree
  6:     Apprenticeship or trades certificate or diploma
  7:     College, CEGEP or other non-university certificate or diploma
  8:     University certificate or diploma below bachelor level
  9:     University certificate, diploma or degree at bachelor level or above


In [19]:
education_columns = (
    (1, "index"),
    (1803, "total"),
    (1804, "none"),
    (1805, "high-school"),
    (1806, "post-secondary"),
    (1807, "trades"),
    (1808, "college"),
    (1809, "university-below-bachelor"),
    (1810, "university-bachelor")
)

education = agate.Table.from_csv("education-tmp.csv", [name for col, name in education_columns])
education.to_csv("education.csv")

In [19]:
%%bash
csvcut -n education.csv

  1: index
  2: total
  3: none
  4: high-school
  5: post-secondary
  6: trades
  7: college
  8: university-below-bachelor
  9: university-bachelor


## Labour force status

In [20]:
%%bash

csvcut -c 1,1989-1996 nhs-planning-districts-indexed.csv > labour-tmp.csv
csvcut -n labour-tmp.csv

  1: line_number
  2: Total population aged 15 years and over by labour force status 
  3:   In the labour force 
  4:     Employed 
  5:     Unemployed
  6:   Not in the labour force
  7: Participation rate
  8: Employment rate 
  9: Unemployment rate 


In [21]:
labour_columns = (
    (1, "index"),
    (1989, "total"),
    (1990, "in-labour-force"),
    (1991, "in-employed"),
    (1992, "in-unemployed"),
    (1993, "not-in-labour-force"),
    (1994, "participation-rate"),
    (1995, "employment-rate"),
    (1996, "unemployment-rate")
)

labour = agate.Table.from_csv("labour-tmp.csv", [name for col, name in labour_columns])
labour.to_csv("labour.csv")

In [22]:
%%bash

csvcut -n labour.csv

  1: index
  2: total
  3: in-labour-force
  4: in-employed
  5: in-unemployed
  6: not-in-labour-force
  7: participation-rate
  8: employment-rate
  9: unemployment-rate


## After tax income

In [22]:
%%bash

csvcut -c 1,2286-2300 nhs-planning-districts-indexed.csv > income-tmp.csv
csvcut -n income-tmp.csv

  1: line_number
  2: After-tax income in 2010 of population 15 years and over
  3:   Without after-tax income
  4:   With after-tax income
  5:     Under $5,000
  6:     $5,000 to $9,999
  7:     $10,000 to $14,999
  8:     $15,000 to $19,999
  9:     $20,000 to $29,999
 10:     $30,000 to $39,999
 11:     $40,000 to $49,999
 12:     $50,000 to $59,999
 13:     $60,000 to $79,999
 14:     $80,000 to $99,999
 15:     $100,000 and over
 16:   Median after-tax income $


In [23]:
income_columns = (
    (1, "index"),
    (2286, "total"),
    (2287, "without-income"),
    (2288, "with-income"),
    (2289, "under-5"),
    (2290, "5-10"),
    (2291, "10-15"),
    (2292, "15-20"),
    (2293, "20-30"),
    (2294, "30-40"),
    (2295, "40-50"),
    (2296, "50-60"),
    (2297, "60-80"),
    (2298, "80-100"),
    (2299, "100-plus"),
    (2300, "median-income")
)

income = agate.Table.from_csv("income-tmp.csv", [name for col, name in income_columns])
income.to_csv("income.csv")

In [25]:
%%bash

csvcut -n income.csv

  1: index
  2: total
  3: without-income
  4: with-income
  5: under-5
  6: 5-10
  7: 10-15
  8: 15-20
  9: 20-30
 10: 30-40
 11: 40-50
 12: 50-60
 13: 60-80
 14: 80-100
 15: 100-plus
 16: median-income


## Dwellings conditions

In [24]:
%%bash

csvcut -c 1,2493-2495 nhs-planning-districts-indexed.csv > dwellings-tmp.csv
csvcut -n dwellings-tmp.csv

  1: line_number
  2: Total number of occupied private dwellings by condition of dwelling
  3:   Only regular maintenance or minor repairs needed
  4:   Major repairs needed


In [25]:
dwellings_columns = (
    (1, "index"),
    (2493, "total"),
    (2494, "minor-repairs"),
    (2495, "major-repairs")
)

dwellings = agate.Table.from_csv("dwellings-tmp.csv", [name for col, name in dwellings_columns])
dwellings.to_csv("dwellings.csv")

## Housing suitability

In [26]:
%%bash

csvcut -c 1,2537-2539 nhs-planning-districts-indexed.csv > housing-tmp.csv
csvcut -n housing-tmp.csv

  1: line_number
  2: Total number of private households by housing suitability
  3:   Suitable
  4:   Not suitable


In [27]:
housing_columns = (
    (1, "index"),
    (2537, "total"),
    (2538, "suitable"),
    (2539, "not-suitable")
)

housing = agate.Table.from_csv("housing-tmp.csv", [name for col, name in housing_columns])
housing.to_csv("housing.csv")

In [28]:
%%bash

csvcut -n housing.csv

  1: index
  2: total
  3: suitable
  4: not-suitable


## Shelter cost

In [29]:
%%bash

csvcut -c 1,2540-2542 nhs-planning-districts-indexed.csv > shelter-tmp.csv
csvcut -n shelter-tmp.csv

  1: line_number
  2: Total number of owner and tenant households with household total income greater than zero, in non-farm, non-reserve private dwellings by shelter-cost-to-income ratio
  3:   Spending less than 30% of household total income on shelter costs
  4:   Spending 30% or more of household total income on shelter costs


In [30]:
shelter_columns = (
    (1, "index"),
    (2540, "total"),
    (2541, "less-than-30-percent"),
    (2542, "30-percent-or-more")
)

shelter = agate.Table.from_csv("shelter-tmp.csv", [name for col, name in shelter_columns])
shelter.to_csv("shelter.csv")

In [31]:
%%bash

csvcut -n shelter.csv

  1: index
  2: total
  3: less-than-30-percent
  4: 30-percent-or-more


## Visible Minorities

In [32]:
%%bash

csvcut -c 1,459-473 nhs-planning-districts-indexed.csv > minority-tmp.csv
csvcut -n minority-tmp.csv

  1: line_number
  2: Total population in private households by visible minority
  3:   Total visible minority population
  4:     South Asian
  5:     Chinese
  6:     Black
  7:     Filipino
  8:     Latin American
  9:     Arab
 10:     Southeast Asian
 11:     West Asian
 12:     Korean
 13:     Japanese
 14:     Visible minority, n.i.e.
 15:     Multiple visible minorities
 16:   Not a visible minority


In [33]:
minority_columns = (
    (1, "index"),
    (459, "total"),
    (460, "minority"),
    (461, "south-asian"),
    (462, "chinese"),
    (463, "black"),
    (464, "filipino"),
    (465, "latin-american"),
    (466, "arab"),
    (467, "southeast-asian"),
    (468, "west-asian"),
    (469, "korean"),
    (470, "japanese"),
    (471, "other"),
    (472, "multiple"),
    (473, "not-minority"),
)

minority = agate.Table.from_csv("minority-tmp.csv", [name for col, name in minority_columns])
minority.to_csv("minority.csv")

In [34]:
%%bash
csvcut -n minority.csv

  1: index
  2: total
  3: minority
  4: south-asian
  5: chinese
  6: black
  7: filipino
  8: latin-american
  9: arab
 10: southeast-asian
 11: west-asian
 12: korean
 13: japanese
 14: other
 15: multiple
 16: not-minority


## Mode of Transportation to Work

In [35]:
%%bash

csvcut -c 1,2193-2199 nhs-planning-districts-indexed.csv > transportation-tmp.csv
csvcut -n transportation-tmp.csv

  1: line_number
  2: Total employed population aged 15 years and over with a usual place of work or no fixed workplace address by mode of transportation 
  3:   Car, truck or van - as a driver 
  4:   Car, truck or van - as a passenger 
  5:   Public transit 
  6:   Walked
  7:   Bicycle
  8:   Other methods 


In [36]:
transportation_columns = (
    (1, "index"),
    (2193, "total"),
    (2194, "car-driver"),
    (2195, "car-passenger"),
    (2196, "transit"),
    (2197, "walked"),
    (2198, "bicycle"),
    (2199, "other"),
)

transportation = agate.Table.from_csv("transportation-tmp.csv", [name for col, name in transportation_columns])
transportation.to_csv("transportation.csv")

In [37]:
%%bash
csvcut -n transportation.csv

  1: index
  2: total
  3: car-driver
  4: car-passenger
  5: transit
  6: walked
  7: bicycle
  8: other


## After tax household income

In [38]:
%%bash

csvcut -c 1,2570-2581,2587,2589,2592,2594,2597 nhs-planning-districts-indexed.csv > household-income-tmp.csv
csvcut -n household-income-tmp.csv

  1: line_number
  2: After-tax income of households in 2010 of private households
  3:   Under $5,000
  4:   $5,000 to $9,999
  5:   $10,000 to $14,999
  6:   $15,000 to $19,999
  7:   $20,000 to $29,999
  8:   $30,000 to $39,999
  9:   $40,000 to $49,999
 10:   $50,000 to $59,999
 11:   $60,000 to $79,999
 12:   $80,000 to $99,999
 13:   $100,000 and over
 14:   Median after-tax household income $
 15:   One-person private households
 16:     Median after-tax household income $
 17:   Two-or-more-persons private households
 18:     Median after-tax household income $


In [41]:
household_income_columns = (
    (1, "index"),
    (2570, "total"),
    (2571, "under-5"),
    (2572, "5-10"),
    (2573, "10-15"),
    (2574, "15-20"),
    (2575, "20-30"),
    (2576, "30-40"),
    (2577, "40-50"),
    (2578, "50-60"),
    (2579, "60-80"),
    (2580, "80-100"),
    (2581, "100-plus"),
    (2587, "median-hhi"),
    (2589, "one-person-hh"),
    (2592, "one-person-hhi"),
    (2594, "two-plus-hh"),
    (2597, "two-plus-hhi"),
)

household_income = agate.Table.from_csv("household-income-tmp.csv", [name for col, name in household_income_columns])
household_income.to_csv("household-income.csv")

In [42]:
%%bash
csvcut -n household-income.csv

  1: index
  2: total
  3: under-5
  4: 5-10
  5: 10-15
  6: 15-20
  7: 20-30
  8: 30-40
  9: 40-50
 10: 50-60
 11: 60-80
 12: 80-100
 13: 100-plus
 14: median-hhi
 15: one-person-hh
 16: one-person-hhi
 17: two-plus-hh
 18: two-plus-hhi


## Income source

In [43]:
%%bash

csvcut -c 1,2334-2347 nhs-planning-districts-indexed.csv > income-source-tmp.csv
csvcut -n income-source-tmp.csv

  1: line_number
  2: Composition of total income in 2010 of population 15 years and over %
  3:   Market income %
  4:     Employment income %
  5:       Wages and salaries %
  6:       Self-employment income %
  7:     Investment income %
  8:     Retirement pensions, superannuation and annuities %
  9:     Other money income %
 10:   Government transfer payments %
 11:     Canada/Quebec Pension Plan benefits %
 12:     Old Age Security pensions and Guaranteed Income Supplement %
 13:     Employment Insurance benefits %
 14:     Child benefits %
 15:     Other income from government sources %


In [44]:
income_source_columns = (
    (1, "index"),
    (2334, "total"),
    (2335, "market-income"),
    (2336, "employment"),
    (2337, "wages-salaries"),
    (2338, "self-employment"),
    (2339, "investment"),
    (2340, "retirement-pension"),
    (2341, "other-market"),
    (2342, "government-transfers"),
    (2343, "CPP"),
    (2344, "OAS-GIS"),
    (2345, "EI"),
    (2346, "child-benefits"),
    (2347, "other-government"),
)

income_source = agate.Table.from_csv("income-source-tmp.csv", [name for col, name in income_source_columns])
income_source.to_csv("income-source.csv")

In [45]:
%%bash
csvcut -n income-source.csv

  1: index
  2: total
  3: market-income
  4: employment
  5: wages-salaries
  6: self-employment
  7: investment
  8: retirement-pension
  9: other-market
 10: government-transfers
 11: CPP
 12: OAS-GIS
 13: EI
 14: child-benefits
 15: other-government


## Tenant versus owner households

In [46]:
%%bash

csvcut -c 1,2544,2546-2547,2549,2551-2554 nhs-planning-districts-indexed.csv > tenant-owner-tmp.csv
csvcut -n tenant-owner-tmp.csv

  1: line_number
  2: Number of owner households in non-farm, non-reserve private dwellings
  3:   % of owner households spending 30% or more of household total income on shelter costs
  4:   Median monthly shelter costs for owned dwellings ($)
  5:   Median value of dwellings ($)
  6: Number of tenant households in non-farm, non-reserve private dwellings
  7:   % of tenant households in subsidized housing
  8:   % of tenant households spending 30% or more of household total income on shelter costs
  9:   Median monthly shelter costs for rented dwellings ($)


In [47]:
tenant_owner_columns = (
    (1, "index"),
    (2544, "owner-households"),
    (2546, "percent-owner-30-percent-or-more"),
    (2547, "owner-median-monthly-shelter-cost"),
    (2549, "owner-median-value-dwelling"),
    (2551, "tenant-households"),
    (2552, "percent-subsidized-housing"),
    (2553, "percent-tenant-30-percent-or-more"),
    (2554, "tenant-median-monthly-shelter-cost"),
)

tenant_owner = agate.Table.from_csv("tenant-owner-tmp.csv", [name for col, name in tenant_owner_columns])
tenant_owner.to_csv("tenant-owner.csv")

In [48]:
%%bash
csvcut -n tenant-owner.csv

  1: index
  2: owner-households
  3: percent-owner-30-percent-or-more
  4: owner-median-monthly-shelter-cost
  5: owner-median-value-dwelling
  6: tenant-households
  7: percent-subsidized-housing
  8: percent-tenant-30-percent-or-more
  9: tenant-median-monthly-shelter-cost
