In [1]:
# Import dependencies
import os 
import pandas as pd
import matplotlib as plt
import regex as re
import numpy as np

In [2]:
# Import CSV file
df = pd.read_csv('Resources/Crime_Index_Greater_Houston_Area_2015_2020.csv')
df.head()

Unnamed: 0,AgencyName,Murder,Rape,Robbery,Assault,Burglary,Larceny,Auto Theft,Total,Population,Year,County
0,BELLVILLE PD,0,3,2,6,17,45,0,73,4235,2015,Austin County
1,SEALY ISD PD,0,0,0,1,1,5,0,7,0,2015,Austin County
2,AUSTIN CO SO,0,3,2,13,48,61,8,135,17499,2015,Austin County
3,WALLIS PD,0,0,0,0,3,10,0,13,1284,2015,Austin County
4,SEALY PD,0,2,0,33,52,162,5,254,6336,2015,Austin County


In [3]:
# Check types
df.dtypes

AgencyName    object
Murder         int64
Rape           int64
Robbery        int64
Assault        int64
Burglary       int64
Larceny        int64
Auto Theft     int64
Total          int64
Population     int64
Year           int64
County        object
dtype: object

In [4]:
# Group crimes by county and sum
county_sum_df = df.groupby('County').sum()
county_sum_df.head(10)

Unnamed: 0_level_0,Murder,Rape,Robbery,Assault,Burglary,Larceny,Auto Theft,Total,Population,Year
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Austin County,3,47,29,233,589,1123,178,2202,179077,60525
Brazoria County,59,832,829,2817,6604,31090,2972,45203,2600073,266315
Brazos County,38,895,646,2556,4993,24075,1886,35089,1344608,48420
Chambers County,14,92,75,484,828,3314,493,5300,223908,36315
Fort Bend County,107,1149,1696,5471,8776,41467,3279,61945,4434119,133155
Galveston County,117,1373,1283,3026,8005,35329,4266,53399,2086193,203766
Harris County,2536,13361,76016,123097,165783,656872,124937,1162602,28242816,534647
Liberty County,22,214,82,823,1385,3934,808,7268,341516,32292
Matagorda County,27,211,171,975,2094,6676,475,10629,324210,66570
Montgomery County,86,968,1100,4171,8442,33615,4500,52882,3441740,149289


In [7]:
# Replace spaces in column names with underscore 
county_sum_df.columns = county_sum_df.columns.str.replace(' ','_')
county_sum_df.head()

Unnamed: 0_level_0,Murder,Rape,Robbery,Assault,Burglary,Larceny,Auto_Theft,Total,Population,Year
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Austin County,3,47,29,233,589,1123,178,2202,179077,60525
Brazoria County,59,832,829,2817,6604,31090,2972,45203,2600073,266315
Brazos County,38,895,646,2556,4993,24075,1886,35089,1344608,48420
Chambers County,14,92,75,484,828,3314,493,5300,223908,36315
Fort Bend County,107,1149,1696,5471,8776,41467,3279,61945,4434119,133155


In [9]:
# Drop the year and population columns
county_sum_df.drop(['Population', 'Year'], axis = 1, inplace = True)
county_sum_df.head()

Unnamed: 0_level_0,Murder,Rape,Robbery,Assault,Burglary,Larceny,Auto_Theft,Total
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Austin County,3,47,29,233,589,1123,178,2202
Brazoria County,59,832,829,2817,6604,31090,2972,45203
Brazos County,38,895,646,2556,4993,24075,1886,35089
Chambers County,14,92,75,484,828,3314,493,5300
Fort Bend County,107,1149,1696,5471,8776,41467,3279,61945


In [10]:
# Rename "Total" to "Total Crimes"
county_sum_df['Total Crimes \'15-\'20'] = county_sum_df['Total']
county_sum_df.drop('Total', axis = 1)

Unnamed: 0_level_0,Murder,Rape,Robbery,Assault,Burglary,Larceny,Auto_Theft,Total Crimes '15-'20
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Austin County,3,47,29,233,589,1123,178,2202
Brazoria County,59,832,829,2817,6604,31090,2972,45203
Brazos County,38,895,646,2556,4993,24075,1886,35089
Chambers County,14,92,75,484,828,3314,493,5300
Fort Bend County,107,1149,1696,5471,8776,41467,3279,61945
Galveston County,117,1373,1283,3026,8005,35329,4266,53399
Harris County,2536,13361,76016,123097,165783,656872,124937,1162602
Liberty County,22,214,82,823,1385,3934,808,7268
Matagorda County,27,211,171,975,2094,6676,475,10629
Montgomery County,86,968,1100,4171,8442,33615,4500,52882


In [13]:
# Create DF for non-violent data
non_violent_df = pd.DataFrame({
    "Burglary": county_sum_df.Burglary,
    "Larceny": county_sum_df.Larceny,
    "Auto_Theft": county_sum_df.Auto_Theft
})

non_violent_df.head()

Unnamed: 0_level_0,Burglary,Larceny,Auto_Theft
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin County,589,1123,178
Brazoria County,6604,31090,2972
Brazos County,4993,24075,1886
Chambers County,828,3314,493
Fort Bend County,8776,41467,3279
