In [1]:
import agate

In [2]:
person = agate.Table.from_csv('2013Person3.csv')
print person

|--------------------------+---------------|
|  column_names            | column_types  |
|--------------------------+---------------|
|  personid                | Text          |
|  vehicleid               | Text          |
|  crashid                 | Number        |
|  personinvolvementcd     | Boolean       |
|  sex                     | Boolean       |
|  dob                     | Date          |
|  personalinjurylevel     | Text          |
|  seatlocationcd          | Text          |
|  transportedind          | Text          |
|  ejectionind             | Text          |
|  airbagcd                | Text          |
|  alcoholuseind           | Text          |
|  pedestrianlocationcd    | Boolean       |
|  pedestriantype          | Boolean       |
|  pedestrianschoolinfoind | Boolean       |
|  ageatcrash              | Number        |
|  contribcirccd1          | Text          |
|  contribcirccd2          | Number        |
|  contribcirccd3          | Number        |
|  contrib

In [3]:
by_personalinjurylevel = person.group_by('personalinjurylevel')

personalinjurylevel_totals = by_personalinjurylevel.aggregate([
    ('count', agate.Count())
])

sorted_totals = personalinjurylevel_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=8)

|----------------------+---------|
|  personalinjurylevel |  count  |
|----------------------+---------|
|  5                   | 83,208  |
|  4                   |  9,467  |
|  3                   |  5,721  |
|  2                   |  1,339  |
|  U                   |    881  |
|  1                   |    154  |
|----------------------+---------|


In [11]:
# Filters rows without age data
only_with_age = by_personalinjurylevel.where(
    lambda r: r['ageatcrash'] is not None
)

# Group by personal injury level
injury_groups = only_with_age.group_by('personalinjurylevel')

# Sub-group by age cohorts (20s, 30s, etc.)
injury_and_age_groups = injury_groups.group_by(
    lambda r: '%i0s' % (r['ageatcrash'] // 10),
    key_name='age_group'
)

injury_groups_totals = injury_and_age_groups.aggregate([
    ('count', agate.Count())
])

#Count
sorted_totals = injury_groups_totals.order_by('age_group', reverse=True)

sorted_totals.print_table(max_rows=75)

|----------------------+-----------------------+-----------+---------|
|  personalinjurylevel | personalinjurylevel_2 | age_group |  count  |
|----------------------+-----------------------+-----------+---------|
|  5                   | 5                     | 990s      |  1,337  |
|  4                   | 4                     | 990s      |     14  |
|  3                   | 3                     | 990s      |      7  |
|  U                   | U                     | 990s      |    303  |
|  5                   | 5                     | 90s       |    155  |
|  4                   | 4                     | 90s       |     13  |
|  3                   | 3                     | 90s       |      5  |
|  2                   | 2                     | 90s       |      1  |
|  1                   | 1                     | 90s       |      2  |
|  5                   | 5                     | 80s       |  1,505  |
|  4                   | 4                     | 80s       |    127  |
|  3  

In [10]:
by_distractioncd = person.group_by('distractioncd1')
by_contribcirccd = person.group_by('contribcirccd1')

# Filters rows without age data
only_with_age = by_distractioncd.where(
    lambda r: r['ageatcrash'] is not None
)

# Group by contrib circ
contrib_groups = only_with_age.group_by('contribcirccd1')

# Sub-group by age cohorts (20s, 30s, etc.)
contrib_and_age_groups = contrib_groups.group_by(
    lambda r: '%i0s' % (r['ageatcrash'] // 10),
    key_name='age_group'
)

contrib_groups_totals = contrib_and_age_groups.aggregate([
    ('count', agate.Count())
])

#Count
sorted_totals = contrib_groups_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=150)

|-----------------+----------------+-----------+--------|
|  distractioncd1 | contribcirccd1 | age_group | count  |
|-----------------+----------------+-----------+--------|
|                 | 22             | 20s       | 9,978  |
|                 | 22             | 30s       | 8,330  |
|                 | 22             | 40s       | 7,506  |
|                 | 22             | 50s       | 6,749  |
|                 | 22             | 60s       | 3,841  |
|                 | 22             | 10s       | 3,182  |
|                 | 9              | 20s       | 2,359  |
|                 | 5              | 20s       | 2,144  |
|                 | 17             | 20s       | 2,137  |
|                 | 22             | 70s       | 1,458  |
|                 | 17             | 10s       | 1,434  |
|                 | 9              | 10s       | 1,424  |
|                 | 5              | 10s       | 1,328  |
|                 | 9              | 30s       | 1,319  |
|             

In [6]:
by_personalinjurylevel = person.group_by('personalinjurylevel')
by_contribcirccd = person.group_by('contribcirccd1')

circs_group = by_personalinjurylevel.group_by('contribcirccd1')

circs_groups_totals = circs_group.aggregate([
    ('count', agate.Count())
])

sorted_totals = circs_groups_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=75)

|----------------------+----------------+---------|
|  personalinjurylevel | contribcirccd1 |  count  |
|----------------------+----------------+---------|
|  5                   | 22             | 34,294  |
|  5                   | 17             |  7,967  |
|  5                   | 9              |  7,888  |
|  5                   | 21             |  5,845  |
|  4                   | 22             |  4,951  |
|  5                   | 5              |  4,892  |
|  5                   | U              |  4,318  |
|  5                   |                |  3,101  |
|  5                   | 13             |  3,033  |
|  3                   | 22             |  1,935  |
|  5                   | 7              |  1,594  |
|  5                   | 33             |  1,448  |
|  5                   | 31             |  1,425  |
|  5                   | 11             |  1,343  |
|  5                   | 12             |  1,152  |
|  5                   | 23             |    910  |
|  4        

In [7]:
by_personalinjurylevel = person.group_by('personalinjurylevel')
by_contribcirccd = person.group_by('contribcirccd1')

circs_group = by_personalinjurylevel.group_by('contribcirccd1')

circs_groups_totals = circs_group.aggregate([
    ('count', agate.Count())
])

sorted_totals = circs_groups_totals.order_by('personalinjurylevel')

sorted_totals.print_table(max_rows=75)

|----------------------+----------------+--------|
|  personalinjurylevel | contribcirccd1 | count  |
|----------------------+----------------+--------|
|  1                   | 5              |    37  |
|  1                   | 4              |     6  |
|  1                   | 13             |    23  |
|  1                   | 18             |     6  |
|  1                   | 14             |     6  |
|  1                   | 22             |    22  |
|  1                   | 7              |     1  |
|  1                   | 31             |     1  |
|  1                   | 33             |     4  |
|  1                   | 20             |     2  |
|  1                   | 17             |    10  |
|  1                   | 19             |     1  |
|  1                   | 3              |     1  |
|  1                   | 28             |     5  |
|  1                   | 21             |     8  |
|  1                   | 8              |    10  |
|  1                   | 6     

In [9]:
by_primarydistraction = person.group_by('distractioncd1')

distraction_totals = by_primarydistraction.aggregate([
    ('count', agate.Count())
])

sorted_totals = distraction_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=30)

|-----------------+---------|
|  distractioncd1 |  count  |
|-----------------+---------|
|                 | 91,557  |
|              15 |  5,207  |
|               1 |  1,482  |
|               2 |    682  |
|               5 |    653  |
|               3 |    311  |
|               9 |    252  |
|              14 |    155  |
|               7 |    137  |
|               4 |    114  |
|              11 |     97  |
|              12 |     39  |
|              10 |     33  |
|               6 |     19  |
|              13 |     18  |
|               8 |     14  |
|-----------------+---------|


In [12]:
by_personalinjurylevel = person.group_by('personalinjurylevel')
by_alcoholuse = person.group_by('alcoholuseind')

circs_group = by_personalinjurylevel.group_by('alcoholuseind')

circs_groups_totals = circs_group.aggregate([
    ('count', agate.Count())
])

sorted_totals = circs_groups_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=75)

|----------------------+---------------+---------|
|  personalinjurylevel | alcoholuseind |  count  |
|----------------------+---------------+---------|
|  5                   | N             | 71,044  |
|  5                   | U             | 11,165  |
|  4                   | N             |  8,293  |
|  3                   | N             |  4,922  |
|  2                   | N             |  1,124  |
|  4                   | U             |  1,046  |
|  5                   | Y             |    995  |
|  U                   | U             |    542  |
|  3                   | U             |    538  |
|  U                   | N             |    320  |
|  3                   | Y             |    260  |
|  4                   | Y             |    128  |
|  2                   | U             |    111  |
|  2                   | Y             |    104  |
|  1                   | N             |     87  |
|  1                   | U             |     37  |
|  1                   | Y     