In [23]:
%%bash
# add line_number column as a "primary key" for joining to other tables
csvcut -l 2011\ Census\ all.csv > "census-planning-districts-indexed.csv"

# create a temporary table with the Geography field which contains codes for each district
csvcut -c 1,2 "census-planning-districts-indexed.csv" > "census-planning-districts-tmp.csv"
head -n 10 census-planning-districts-tmp.csv | csvlook

|--------------+-------------------|
|  line_number | Geography         |
|--------------+-------------------|
|  1           | WAT - PD - C10    |
|  2           | WAT - PD - C1014  |
|  3           | WAT - PD - C103   |
|  4           | WAT - PD - C109   |
|  5           | WAT - PD - C10N   |
|  6           | WAT - PD - C10S   |
|  7           | WAT - PD - C11    |
|  8           | WAT - PD - C116   |
|  9           | WAT - PD - C12    |
|--------------+-------------------|


In [2]:
import agate
import re

In [10]:
def district_code(text):
    """Extract district code from Geography column"""
    _match = re.match("WAT - PD - (.*)", text)
    return _match.group(1)

In [3]:
districts = agate.Table.from_csv("census-planning-districts-tmp.csv")

In [16]:
column_names = ["index", "code"]
column_types = [agate.Number(), agate.Text()]
column_values = [[row["line_number"], district_code(row["Geography"])] for row in districts.rows]
districts_table = agate.Table(column_values, column_names, column_types)
districts_table.to_csv("census-planning-districts-codes-indexed.csv")

In [18]:
%%bash
head -n 10 census-planning-districts-codes-indexed.csv | csvlook

|--------+--------|
|  index | code   |
|--------+--------|
|  1     | C10    |
|  2     | C1014  |
|  3     | C103   |
|  4     | C109   |
|  5     | C10N   |
|  6     | C10S   |
|  7     | C11    |
|  8     | C116   |
|  9     | C12    |
|--------+--------|


In [19]:
%%bash
head -n 10 planning-district-codes.csv | csvlook

|-------+--------------------+---------------|
|  ID   | Name               | Municipality  |
|-------+--------------------+---------------|
|  C6   | Downtown Hespeler  | Cambridge     |
|  C8   | Cambrian Hills     | Cambridge     |
|  C10  | Blair              | Cambridge     |
|  C10N | Shades Mills North | Cambridge     |
|  C10S | Shades Mills South | Cambridge     |
|  C11  | Preston Heights    | Cambridge     |
|  C12  | Central Park       | Cambridge     |
|  C13  | Lang's Farm        | Cambridge     |
|  C17  | Elgin Park         | Cambridge     |
|-------+--------------------+---------------|


In [24]:
%%bash
# create a table we can join with census-planning-districts-indexed.csv
csvjoin -c "code,ID" census-planning-districts-codes-indexed.csv planning-district-codes.csv | \
csvcut -c 1,2,4,5 > "census-planning-districts.csv"

head -n 10 census-planning-districts.csv | csvlook

|--------+-------+----------------------------+---------------|
|  index | code  | Name                       | Municipality  |
|--------+-------+----------------------------+---------------|
|  1     | C10   | Blair                      | Cambridge     |
|  2     | C1014 | Silver Heights/Blackbridge | Cambridge     |
|  3     | C103  | Riverside                  | Cambridge     |
|  4     | C109  | Centennial/River Flats     | Cambridge     |
|  5     | C10N  | Shades Mills North         | Cambridge     |
|  6     | C10S  | Shades Mills South         | Cambridge     |
|  7     | C11   | Preston Heights            | Cambridge     |
|  8     | C116  | Riverview                  | Cambridge     |
|  9     | C12   | Central Park               | Cambridge     |
|--------+-------+----------------------------+---------------|


In [22]:
%%bash
csvcut -n census-planning-districts-indexed.csv > columns.txt

# list "top level" columns by excluding column names that start with a space
grep -v ":  " columns.txt

  1: line_number
  2: Geography
  3: Total population by age groups
 27: Median age of the population
 28: % of the population aged 15 and over
 81: Total population 15 years and over by marital status
108: Total number of persons in private households 
126: Total number of persons aged 65 years and over in private households
144: Total number of census families in private households by family size
149: Total number of census families in private households by family structure and number of children
172: Total children in census families in private households
178: Average number of children at home per census family
179: Average number of persons per census family
180: Total number of private households by household type
197: Total number of private households by household size
204: Total number of persons in private households
205: Average number of persons in private households
206: Total number of occupied private dwellings by structural type of dwelling
216: Detailed Mother Tongue -

In [15]:
%%bash
csvcut -c 1,3,149,150 census-planning-districts-indexed.csv | csvcut -n

  1: line_number
  2: Total population by age groups
  3: Total number of census families in private households by family structure and number of children
  4:   Total couple families by family structure and number of children
