# 2011 National Household Survey

In [1]:
%%bash
cp ../data/waterloo-region/planning-district-codes.csv ../sources/canada/nhs-planningdistrict/

In [2]:
from context import tools
import agate
import re

In [None]:
%cd ../sources/canada/nhs-planningdistrict/

In [4]:
# strip accented characters from column names
tools.recode_headings("NHS all.csv", csv_out="nhs-planning-districts.csv")

In [5]:
%%bash

# index rows with line_number column
csvcut -l nhs-planning-districts.csv > nhs-planning-districts-indexed.csv

# write column names to a file
csvcut -n nhs-planning-districts-indexed.csv > nhs-planning-districts-columns.txt

head -n 5 nhs-planning-districts-indexed.csv | csvcut -c 1,2 | csvlook

|--------------+----------------------------|
|  line_number | Geography                  |
|--------------+----------------------------|
|  1           | WAT - PD - C10 ( 15.6%)    |
|  2           | WAT - PD - C1014 ( 28.9%)  |
|  3           | WAT - PD - C103 ( 32.8%)   |
|  4           | WAT - PD - C109 ( 26.7%)   |
|--------------+----------------------------|


## Planning district codes

In [6]:
%%bash

# extract planning district code and non-response rate from Geography column
csvcut -c 1,2 nhs-planning-districts-indexed.csv > nhs-codes-tmp.csv
head -n 5 nhs-codes-tmp.csv | csvlook

|--------------+----------------------------|
|  line_number | Geography                  |
|--------------+----------------------------|
|  1           | WAT - PD - C10 ( 15.6%)    |
|  2           | WAT - PD - C1014 ( 28.9%)  |
|  3           | WAT - PD - C103 ( 32.8%)   |
|  4           | WAT - PD - C109 ( 26.7%)   |
|--------------+----------------------------|


In [7]:
# create a new csv file from nhs-codes-tmp.csv
geo = agate.Table.from_csv("nhs-codes-tmp.csv")

def match_code(row):
    _match = re.match("WAT - PD - (.*) \( (.*)%\)", row["Geography"])
    _list = [int(row["line_number"])]
    _list.extend(list(_match.groups()))
    return _list

columns = ["index", "code", "non-response"]
types = [agate.Number(), agate.Text(), agate.Number()]
values = [match_code(row) for row in geo.rows]
codes = agate.Table(values, columns, types)
codes.to_csv("nhs-planning-districts-codes.csv")

In [8]:
%%bash
head -n5 nhs-planning-districts-codes.csv | csvlook

|--------+-------+---------------|
|  index | code  | non-response  |
|--------+-------+---------------|
|  1     | C10   | 15.6          |
|  2     | C1014 | 28.9          |
|  3     | C103  | 32.8          |
|  4     | C109  | 26.7          |
|--------+-------+---------------|


## Immigration

In [9]:
immigration_columns = (
  (1, "index"),
  (18, "total"),
  (19, "non-immigrants"),
  (20, "immigrants"),
  (21, "immigrants-before-1971"),
  (22, "immigrants-1971-1980"),
  (23, "immigrants-1981-1990"),
  (24, "immigrants-1991-2000"),
  (25, "immigrants-2001-2011")
)
print ",".join(str(col) for col, name in immigration_columns)

1,18,19,20,21,22,23,24,25


In [10]:
%%bash

csvcut -c 1,18,19,20,21,22,23,24,25 nhs-planning-districts-indexed.csv > immigration-tmp.csv
csvcut -n immigration-tmp.csv

  1: line_number
  2: Total population in private households by immigrant status and period of immigration
  3:   Non-immigrants
  4:   Immigrants
  5:     Before 1971
  6:     1971 to 1980
  7:     1981 to 1990
  8:     1991 to 2000
  9:     2001 to 2011


In [11]:
immigration = agate.Table.from_csv("immigration-tmp.csv", [name for col, name in immigration_columns])
immigration.to_csv("immigration.csv")

In [12]:
%%bash
csvcut -n immigration.csv

  1: index
  2: total
  3: non-immigrants
  4: immigrants
  5: immigrants-before-1971
  6: immigrants-1971-1980
  7: immigrants-1981-1990
  8: immigrants-1991-2000
  9: immigrants-2001-2011


## Mobility

In [13]:
mobility_columns = (
    (1, "index"),
    (1746, "total"),
    (1747, "non-movers"),
    (1748, "movers"),
    (1749, "movers-non-migrants"),
    (1750, "movers-migrants"),
    (1751, "migrants-internal"),
    (1752, "internal-intra"),
    (1753, "internal-inter"),
    (1754, "migrants-external")
)
print ",".join(str(col) for col, name in mobility_columns)

1,1746,1747,1748,1749,1750,1751,1752,1753,1754


In [14]:
%%bash
csvcut -c 1,1746,1747,1748,1749,1750,1751,1752,1753,1754 nhs-planning-districts-indexed.csv > mobility-tmp.csv
csvcut -n mobility-tmp.csv

  1: line_number
  2: Total - Mobility status 5 years ago
  3:   Non-movers
  4:   Movers
  5:     Non-migrants
  6:     Migrants
  7:       Internal migrants
  8:         Intraprovincial migrants
  9:         Interprovincial migrants
 10:       External migrants


In [15]:
mobility = agate.Table.from_csv("mobility-tmp.csv", [name for col, name in mobility_columns])
mobility.to_csv("mobility.csv")

In [16]:
%%bash
csvcut -n mobility.csv

  1: index
  2: total
  3: non-movers
  4: movers
  5: movers-non-migrants
  6: movers-migrants
  7: migrants-internal
  8: internal-intra
  9: internal-inter
 10: migrants-external


## Educational achievement

In [17]:
%%bash

csvcut -c 1,1803-1810 nhs-planning-districts-indexed.csv > education-tmp.csv
csvcut -n education-tmp.csv

  1: line_number
  2: Total population aged 25 to 64 years by highest certificate, diploma or degree
  3:   No certificate, diploma or degree
  4:   High school diploma or equivalent
  5:   Postsecondary certificate, diploma or degree
  6:     Apprenticeship or trades certificate or diploma
  7:     College, CEGEP or other non-university certificate or diploma
  8:     University certificate or diploma below bachelor level
  9:     University certificate, diploma or degree at bachelor level or above


In [18]:
education_columns = (
    (1, "index"),
    (1803, "total"),
    (1804, "none"),
    (1805, "high-school"),
    (1806, "post-secondary"),
    (1807, "trades"),
    (1808, "college"),
    (1809, "university-below-bachelor"),
    (1810, "university-bachelor")
)

education = agate.Table.from_csv("education-tmp.csv", [name for col, name in education_columns])
education.to_csv("education.csv")

In [19]:
%%bash
csvcut -n education.csv

  1: index
  2: total
  3: none
  4: high-school
  5: post-secondary
  6: trades
  7: college
  8: university-below-bachelor
  9: university-bachelor


## Labour force status

In [20]:
%%bash

csvcut -c 1,1989-1996 nhs-planning-districts-indexed.csv > labour-tmp.csv
csvcut -n labour-tmp.csv

  1: line_number
  2: Total population aged 15 years and over by labour force status 
  3:   In the labour force 
  4:     Employed 
  5:     Unemployed
  6:   Not in the labour force
  7: Participation rate
  8: Employment rate 
  9: Unemployment rate 


In [21]:
labour_columns = (
    (1, "index"),
    (1989, "total"),
    (1990, "in-labour-force"),
    (1991, "in-employed"),
    (1992, "in-unemployed"),
    (1993, "not-in-labour-force"),
    (1994, "participation-rate"),
    (1995, "employment-rate"),
    (1996, "unemployment-rate")
)

labour = agate.Table.from_csv("labour-tmp.csv", [name for col, name in labour_columns])
labour.to_csv("labour.csv")

In [22]:
%%bash

csvcut -n labour.csv

  1: index
  2: total
  3: in-labour-force
  4: in-employed
  5: in-unemployed
  6: not-in-labour-force
  7: participation-rate
  8: employment-rate
  9: unemployment-rate


## After tax income

In [23]:
%%bash

csvcut -c 1,2286-2300 nhs-planning-districts-indexed.csv > income-tmp.csv
csvcut -n income-tmp.csv

  1: line_number
  2: After-tax income in 2010 of population 15 years and over
  3:   Without after-tax income
  4:   With after-tax income
  5:     Under $5,000
  6:     $5,000 to $9,999
  7:     $10,000 to $14,999
  8:     $15,000 to $19,999
  9:     $20,000 to $29,999
 10:     $30,000 to $39,999
 11:     $40,000 to $49,999
 12:     $50,000 to $59,999
 13:     $60,000 to $79,999
 14:     $80,000 to $99,999
 15:     $100,000 and over
 16:   Median after-tax income $


In [24]:
income_columns = (
    (1, "index"),
    (2286, "total"),
    (2287, "without-income"),
    (2288, "with-income"),
    (2289, "under-5"),
    (2290, "5-10"),
    (2291, "10-15"),
    (2292, "15-20"),
    (2293, "20-30"),
    (2294, "30-40"),
    (2295, "40-50"),
    (2296, "50-60"),
    (2297, "60-80"),
    (2298, "80-100"),
    (2299, "100-plus"),
    (2300, "median-income")
)

income = agate.Table.from_csv("income-tmp.csv", [name for col, name in income_columns])
income.to_csv("income.csv")

In [25]:
%%bash

csvcut -n income.csv

  1: index
  2: total
  3: without-income
  4: with-income
  5: under-5
  6: 5-10
  7: 10-15
  8: 15-20
  9: 20-30
 10: 30-40
 11: 40-50
 12: 50-60
 13: 60-80
 14: 80-100
 15: 100-plus
 16: median-income


## Dwellings conditions

In [26]:
%%bash

csvcut -c 1,2493-2495 nhs-planning-districts-indexed.csv > dwellings-tmp.csv
csvcut -n dwellings-tmp.csv

  1: line_number
  2: Total number of occupied private dwellings by condition of dwelling
  3:   Only regular maintenance or minor repairs needed
  4:   Major repairs needed


In [27]:
dwellings_columns = (
    (1, "index"),
    (2493, "total"),
    (2494, "minor-repairs"),
    (2495, "major-repairs")
)

dwellings = agate.Table.from_csv("dwellings-tmp.csv", [name for col, name in dwellings_columns])
dwellings.to_csv("dwellings.csv")

## Housing suitability

In [28]:
%%bash

csvcut -c 1,2537-2539 nhs-planning-districts-indexed.csv > housing-tmp.csv
csvcut -n housing-tmp.csv

  1: line_number
  2: Total number of private households by housing suitability
  3:   Suitable
  4:   Not suitable


In [29]:
housing_columns = (
    (1, "index"),
    (2537, "total"),
    (2538, "suitable"),
    (2539, "not-suitable")
)

housing = agate.Table.from_csv("housing-tmp.csv", [name for col, name in housing_columns])
housing.to_csv("housing.csv")

In [30]:
%%bash

csvcut -n housing.csv

  1: index
  2: total
  3: suitable
  4: not-suitable


## Shelter cost

In [31]:
%%bash

csvcut -c 1,2540-2542 nhs-planning-districts-indexed.csv > shelter-tmp.csv
csvcut -n shelter-tmp.csv

  1: line_number
  2: Total number of owner and tenant households with household total income greater than zero, in non-farm, non-reserve private dwellings by shelter-cost-to-income ratio
  3:   Spending less than 30% of household total income on shelter costs
  4:   Spending 30% or more of household total income on shelter costs


In [32]:
shelter_columns = (
    (1, "index"),
    (2540, "total"),
    (2541, "less-than-30-percent"),
    (2542, "30-percent-or-more")
)

shelter = agate.Table.from_csv("shelter-tmp.csv", [name for col, name in shelter_columns])
shelter.to_csv("shelter.csv")

In [33]:
%%bash

csvcut -n shelter.csv

  1: index
  2: total
  3: less-than-30-percent
  4: 30-percent-or-more
