# What Dis

Consolidated crime data, curated from the FBI and city-data.com

# Imports and Definitions

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
abbreviations = {
    "Alabama"       : "AL",
    "Alaska"        : "AK",
    "Arizona"       : "AZ",
    "Arkansas"      : "AR",
    "California"    : "CA",
    "Colorado"      : "CO",
    "Connecticut"   : "CT",
    "Delaware"      : "DE",
    "Florida"       : "FL",
    "Georgia"       : "GA",
    "Hawaii"        : "HI",
    "Idaho"         : "ID",
    "Illinois"      : "IL",
    "Indiana"       : "IN",
    "Iowa"          : "IA",
    "Kansas"        : "KS",
    "Kentucky"      : "KY",
    "Louisiana"     : "LA",
    "Maine"         : "ME",
    "Maryland"      : "MD",
    "Massachusetts" : "MA",
    "Michigan"      : "MI",
    "Minnesota"     : "MN",
    "Mississippi"   : "MS",
    "Missouri"      : "MO",
    "Montana"       : "MT",
    "Nebraska"      : "NE",
    "Nevada"        : "NV",
    "New Hampshire" : "NH",
    "New Jersey"    : "NJ",
    "New Mexico"    : "NM",
    "New York"      : "NY",
    "North Carolina": "NC",
    "North Dakota"  : "ND",
    "Ohio"          : "OH",
    "Oklahoma"      : "OK",
    "Oregon"        : "OR",
    "Pennsylvania"  : "PA",
    "Rhode Island"  : "RI",
    "South Carolina": "SC",
    "South Dakota"  : "SD",
    "Tennessee"     : "TN",
    "Texas"         : "TX",
    "Utah"          : "UT",
    "Vermont"       : "VT",
    "Virginia"      : "VA",
    "Washington"    : "WA",
    "West Virginia" : "WV",
    "Wisconsin"     : "WI",
    "Wyoming"       : "WY",
}

In [3]:
places = {
    "Alabama": [
        "Birmingham",
        "Montgomery",],
    "Alaska": [
        "Anchorage",
        "Juneau",],
    "Arizona": [
        "Phoenix",
        "Tucson",],
    "Arkansas": [
        "Little Rock",
        "Fort Smith",],
    "California": [
        "San Diego",
        "Los Angeles",
        "Sacramento",],
    "Colorado": [
        "Denver",
        "Colorado Springs",
        "Aurora",],
    "Connecticut": [
        "Bridgeport",
        "Hartford",],
    "Delaware": [
        "Dover",
        "Wilmington",],
    "Florida": [
        "Jacksonville",
        "Miami",
        "Tallahassee",],
    "Georgia": [
        "Atlanta",
        "Savannah",],
    "Hawaii": [
        "Honolulu",
        "Kauai",
        "Maui",],
    "Idaho": [
        "Boise",
        "Meridian",],
    "Illinois": [
        "Chicago",
        "Springfield",],
    "Indiana": [
        "Fort Wayne",
        "Indianapolis",],
    "Iowa": [
        "Cedar Rapids",
        "Des Moines",],
    "Kansas": [
        "Topeka",
        "Wichita",],
    "Kentucky": [
        "Frankfort",
        "Louisville",],
    "Louisiana": [
        "Baton Rouge",
        "New Orleans",],
    "Maine": [
        "Augusta",
        "Portland",],
    "Maryland": [
        "Baltimore",
        "Annapolis",],
    "Massachusetts": [
        "Boston",
        "Worcester",],
    "Michigan": [
        "Detroit",
        "Lansing",],
    "Minnesota": [
        "Minneapolis",
        "Saint Paul",],
    "Mississippi": [
        "Jackson",
        "Gulfport",],
    "Missouri": [
        "Kansas City",
        "Jefferson City",],
    "Montana": [
        "Billings",
        "Helena",],
    "Nebraska": [
        "Lincoln",
        "Omaha",],
    "Nevada": [
        "Carson City",
        "Las Vegas",
        "Reno",],
    "New Hampshire": [
        "Concord",
        "Manchester",],
    "New Jersey": [
        "Newark",
        "Trenton",],
    "New Mexico": [
        "Albuquerque",
        "Santa Fe",],
    "New York": [
        "Albany",
        "New York City",],
    "North Carolina": [
        "Charlotte",
        "Raleigh",],
    "North Dakota": [
        "Bismarck",
        "Fargo",],
    "Ohio": [
        "Columbus",
        "Cleveland",],
    "Oklahoma": [
        "Oklahoma City",
        "Tulsa",],
    "Oregon": [
        "Portland",
        "Salem",],
    "Pennsylvania": [
        "Harrisburg",
        "Philadelphia",],
    "Rhode Island": [
        "Providence",
        "Warwick",],
    "South Carolina": [
        "Charleston",
        "Columbia",],
    "South Dakota": [
        "Pierre",
        "Sioux Falls",],
    "Tennessee": [
        "Nashville",
        "Memphis",],
    "Texas": [
        "Austin",
        "Houston",],
    "Utah": [
        "Salt Lake City",
        "West Valley City",],
    "Vermont": [
        "Burlington",
        "Montpelier",],
    "Virginia": [
        "Richmond",
        "Virginia Beach",],
    "Washington": [
        "Olympia",
        "Seattle",],
    "West Virginia": [
        "Charleston",
        "Huntington",],
    "Wisconsin": [
        "Madison",
        "Milwaukee",],
    "Wyoming": [
        "Casper",
        "Cheyenne",],
}

In [4]:
years = list(range(2010, 2016+1))

In [5]:
missing_places = { # places missing from citydata
    "Hawaii": [
        "Honolulu",
        "Kauai",
        "Maui",],
    "Nevada": [
        "Carson City",],
    "Tennessee": [
        "Nashville",],
    "Utah": [
        "West Valley City",],
}

In [6]:
corrections = { # alternative names in fbi
    "Charlotte": "charlotte-mecklenburg",
    "Las Vegas": "las vegas metropolitan police department",
    "Savannah": "savannah-chatham metropolitan",
    "West Valley City": "west valley",
    "New York City": "new york"
}

In [7]:
citydata = pd.read_pickle("citydata_crime_data.pkl")

In [8]:
fbi = pd.read_pickle("fbi_crime_data.pkl")

# Processing

In [9]:
data = citydata.copy()

In [10]:
# add populations
data['population'] = np.nan
for state, city, year in zip(data.index.get_level_values('state'),
                             data.index.get_level_values('city'),
                             data.index.get_level_values('year')):
    try:
        fbi.loc[state.lower(), (corrections[city] if city in corrections else city).lower(), int(year)]
    except KeyError:
        continue
    data.loc[state, city, year].loc['population'] = \
            fbi.loc[state.lower(),
                    (corrections[city] if city in corrections else city).lower(),
                    int(year)] \
                   .loc['population']

In [11]:
# add missing cities
for state, cities in missing_places.items():
    for city in cities:
        if city in corrections:
            city = corrections[city]
        try:
            fbi.loc[state.lower(), city.lower()]
        except KeyError:
            print("fbi does not contain {city}, {state}"
                  .format(city=city, state=state))
            continue

fbi does not contain Kauai, Hawaii
fbi does not contain Maui, Hawaii
fbi does not contain Carson City, Nevada


In [12]:
data.to_pickle("crime_data.pkl")

In [13]:
with pd.option_context('display.max_rows', None):
    display(data)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,arson,assault,burglary,murder,rape,robbery,theft,vehicle theft,population
state,city,year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alabama,Birmingham,2002,211.0,1697.0,4389.0,65.0,239.0,1186.0,11640.0,2049.0,
Alabama,Birmingham,2003,175.0,1706.0,4831.0,85.0,204.0,1352.0,11934.0,2809.0,
Alabama,Birmingham,2004,142.0,1593.0,5156.0,59.0,240.0,1369.0,11970.0,2351.0,
Alabama,Birmingham,2005,136.0,1675.0,4933.0,104.0,241.0,1429.0,11962.0,2028.0,
Alabama,Birmingham,2006,228.0,1422.0,4813.0,104.0,220.0,1429.0,12113.0,2081.0,
Alabama,Birmingham,2007,221.0,1396.0,4864.0,86.0,229.0,1609.0,12528.0,2246.0,
Alabama,Birmingham,2008,134.0,1456.0,5153.0,82.0,212.0,1499.0,12761.0,2140.0,
Alabama,Birmingham,2009,135.0,1399.0,5019.0,65.0,198.0,1150.0,11546.0,1594.0,
Alabama,Birmingham,2011,123.0,1916.0,5806.0,54.0,182.0,1011.0,10522.0,1513.0,213258.0
Alabama,Birmingham,2012,117.0,2035.0,4704.0,67.0,152.0,983.0,9042.0,1042.0,213266.0
