In [94]:
import agate

In [95]:
person = agate.Table.from_csv('2013Person1.csv')
print person

|--------------------------+---------------|
|  column_names            | column_types  |
|--------------------------+---------------|
|  personid                | Text          |
|  vehicleid               | Text          |
|  crashid                 | Number        |
|  personinvolvementcd     | Boolean       |
|  sex                     | Text          |
|  dob                     | Date          |
|  personalinjurylevel     | Text          |
|  seatlocationcd          | Text          |
|  transportedind          | Text          |
|  ejectionind             | Text          |
|  airbagcd                | Text          |
|  alcoholuseind           | Text          |
|  pedestrianlocationcd    | Boolean       |
|  pedestriantype          | Boolean       |
|  pedestrianschoolinfoind | Boolean       |
|  ageatcrash              | Number        |
|  contribcirccd1          | Text          |
|  contribcirccd2          | Number        |
|  contribcirccd3          | Number        |
|  contrib

In [96]:
by_personalinjurylevel = person.group_by('personalinjurylevel')

personalinjurylevel_totals = by_personalinjurylevel.aggregate([
    ('count', agate.Count())
])

sorted_totals = personalinjurylevel_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=8)

|----------------------+---------|
|  personalinjurylevel |  count  |
|----------------------+---------|
|  U                   | 10,666  |
|  5                   |  2,417  |
|  4                   |     35  |
|  3                   |     25  |
|  N                   |     15  |
|  2                   |      4  |
|  1                   |      1  |
|----------------------+---------|


In [97]:
binned_ages = person.bins('ageatcrash', 10, 0, 100)
binned_ages.print_bars('ageatcrash', 'Count', width=80)

ageatcrash  Count
[0 - 10)        4 ▓                                                             
[10 - 20)      55 ▓                                                             
[20 - 30)     118 ▓                                                             
[30 - 40)      93 ▓                                                             
[40 - 50)      74 ▓                                                             
[50 - 60)      69 ▓                                                             
[60 - 70)      50 ▓                                                             
[70 - 80)      20 ▓                                                             
[80 - 90)       3 ▓                                                             
[90 - 100] 12,677 ▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                      
                  +--------------+--------------+---------------+--------------+
                  0            5,000         10,000          15,000       20,000


In [98]:
# Filters rows without age data
only_with_age = by_personalinjurylevel.where(
    lambda r: r['ageatcrash'] is not None
)

# Group by personal injury level
injury_groups = only_with_age.group_by('personalinjurylevel')

# Sub-group by age cohorts (20s, 30s, etc.)
injury_and_age_groups = injury_groups.group_by(
    lambda r: '%i0s' % (r['ageatcrash'] // 10),
    key_name='age_group'
)

injury_groups_totals = injury_and_age_groups.aggregate([
    ('count', agate.Count())
])

#Count
sorted_totals = injury_groups_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=50)


|----------------------+-----------------------+-----------+---------|
|  personalinjurylevel | personalinjurylevel_2 | age_group |  count  |
|----------------------+-----------------------+-----------+---------|
|  U                   | U                     | 990s      | 10,638  |
|  5                   | 5                     | 990s      |  2,010  |
|  5                   | 5                     | 20s       |    104  |
|  5                   | 5                     | 30s       |     74  |
|  5                   | 5                     | 40s       |     63  |
|  5                   | 5                     | 50s       |     62  |
|  5                   | 5                     | 10s       |     44  |
|  5                   | 5                     | 60s       |     42  |
|  5                   | 5                     | 70s       |     16  |
|  N                   | N                     | 990s      |     11  |
|  3                   | 3                     | 990s      |      8  |
|  4  

In [99]:
by_distractioncd = person.group_by('distractioncd1')

# Filters rows without age data
only_with_age = by_distractioncd.where(
    lambda r: r['ageatcrash'] is not None
)

# Group by distraction
distraction_groups = only_with_age.group_by('distractioncd1')

# Sub-group by age cohorts (20s, 30s, etc.)
distraction_and_age_groups = distraction_groups.group_by(
    lambda r: '%i0s' % (r['ageatcrash'] // 10),
    key_name='age_group'
)

distraction_groups_totals = distraction_and_age_groups.aggregate([
    ('count', agate.Count())
])

#Count
sorted_totals = distraction_groups_totals.order_by('distractioncd1', reverse=True)

sorted_totals.print_table(max_rows=75)

|-----------------+------------------+-----------+---------|
|  distractioncd1 | distractioncd1_2 | age_group |  count  |
|-----------------+------------------+-----------+---------|
|                 |                  | 990s      | 12,206  |
|                 |                  | 20s       |    106  |
|                 |                  | 30s       |     84  |
|                 |                  | 50s       |     65  |
|                 |                  | 60s       |     47  |
|                 |                  | 40s       |     66  |
|                 |                  | 10s       |     47  |
|                 |                  | 70s       |     20  |
|                 |                  | 80s       |      3  |
|                 |                  | 00s       |      4  |
|              15 |               15 | 990s      |    401  |
|              15 |               15 | 30s       |      6  |
|              15 |               15 | 10s       |      2  |
|              15 |     

In [102]:
by_contribcirccd = person.group_by('contribcirccd1')

# Filters rows without age data
only_with_age = by_distractioncd.where(
    lambda r: r['ageatcrash'] is not None
)

# Group by distraction
contrib_groups = only_with_age.group_by('contribcirccd1')

# Sub-group by age cohorts (20s, 30s, etc.)
contrib_and_age_groups = contrib_groups.group_by(
    lambda r: '%i0s' % (r['ageatcrash'] // 10),
    key_name='age_group'
)

contrib_groups_totals = contrib_and_age_groups.aggregate([
    ('count', agate.Count())
])

#Count
sorted_totals = contrib_groups_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=75)

|-----------------+----------------+-----------+--------|
|  distractioncd1 | contribcirccd1 | age_group | count  |
|-----------------+----------------+-----------+--------|
|                 | U              | 990s      | 6,568  |
|                 | 13             | 990s      | 1,111  |
|                 | 5              | 990s      |   709  |
|                 | 9              | 990s      |   600  |
|                 | 17             | 990s      |   553  |
|                 | 22             | 990s      |   549  |
|                 | 4              | 990s      |   385  |
|                 | 11             | 990s      |   308  |
|                 | 7              | 990s      |   298  |
|              15 | 21             | 990s      |   265  |
|                 | 12             | 990s      |   242  |
|                 | 6              | 990s      |   190  |
|                 | 33             | 990s      |   140  |
|                 | 8              | 990s      |   130  |
|             

In [108]:
by_personalinjurylevel = person.group_by('personalinjurylevel')
by_contribcirccd = person.group_by('contribcirccd1')

circs_group = by_personalinjurylevel.group_by('contribcirccd1')

circs_groups_totals = circs_group.aggregate([
    ('count', agate.Count())
])

sorted_totals = circs_groups_totals.order_by('personalinjurylevel')

sorted_totals.print_table(max_rows=75)


|----------------------+----------------+--------|
|  personalinjurylevel | contribcirccd1 | count  |
|----------------------+----------------+--------|
|  1                   | 18             |     1  |
|  2                   | 7              |     1  |
|  2                   | 22             |     1  |
|  2                   | 5              |     1  |
|  2                   | 4              |     1  |
|  3                   | 4              |     4  |
|  3                   | 17             |     1  |
|  3                   | 22             |    10  |
|  3                   | 20             |     2  |
|  3                   | U              |     2  |
|  3                   | 33             |     1  |
|  3                   | 18             |     2  |
|  3                   | 13             |     1  |
|  3                   | 12             |     1  |
|  3                   | 7              |     1  |
|  4                   | 23             |     1  |
|  4                   | 22    

In [112]:
by_personalinjurylevel = person.group_by('personalinjurylevel')
by_alcoholuse = person.group_by('alcoholuseind')

circs_group = by_personalinjurylevel.group_by('alcoholuseind')

circs_groups_totals = circs_group.aggregate([
    ('count', agate.Count())
])

sorted_totals = circs_groups_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=75)

|----------------------+---------------+---------|
|  personalinjurylevel | alcoholuseind |  count  |
|----------------------+---------------+---------|
|  U                   | U             | 10,162  |
|  5                   | U             |  1,911  |
|  5                   | N             |    491  |
|  U                   | N             |    422  |
|  U                   | Y             |     81  |
|  4                   | N             |     26  |
|  5                   | Y             |     14  |
|  3                   | N             |     13  |
|  4                   | U             |      9  |
|  N                   | U             |      8  |
|  3                   | U             |      7  |
|  N                   | N             |      5  |
|  3                   | Y             |      5  |
|  N                   | Y             |      2  |
|  2                   | N             |      2  |
|  2                   | U             |      2  |
|  U                   |       

In [113]:
by_primarydistraction = person.group_by('distractioncd1')

distraction_totals = by_primarydistraction.aggregate([
    ('count', agate.Count())
])

sorted_totals = distraction_totals.order_by('count', reverse=True)

sorted_totals.print_table(max_rows=8)

|-----------------+---------|
|  distractioncd1 |  count  |
|-----------------+---------|
|                 | 12,648  |
|              15 |    423  |
|               1 |     26  |
|               5 |     26  |
|               2 |     14  |
|               9 |      7  |
|               3 |      6  |
|               4 |      4  |
|             ... |    ...  |
|-----------------+---------|
