###### Imports and Settings

In [1]:
import pandas as pd
import numpy as np
import requests
from functools import reduce
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
import sys
sys.path.append("..") # Adds higher directory to python modules path
import geodict
tofullcensus = geodict.tofullcensus
geotogeoid = geodict.geotogeoid
import sqlite3 as sq

# This notebook outlines the download and formatting process for data from the Tennessee Bureau of Investigation.  

Go to this page: https://crimeinsight.tbi.tn.gov/public/View/dispview.aspx?ReportId=20, "Crime Rates by Jurisdiction" fom this page: https://crimeinsight.tbi.tn.gov/public/Browse/browsetables.aspx
We are looking for Crime Rates by county and the State of Tennessee here. Select "Jurisdiction by Geography" on the left-side of the screen, where you are then able to manipulate geographies by level. Select the State, and then deselect the regions, select counties in the GNRC operating area (most in Middle, some in West), and under those counties leave the jurisdictions. Click "show report" in the top right-side of the screen, and then download a multi-dimensional csv file. Delete the data headers, rename Jurisdiction by Geography to "NAME", make sure that the numbers are not formatted with commas, add a county column to tag the juridictions with a county, leaving the column null for the state and counties, and set the column headers correctly before saving the file as "TBRCrimeRatesbyJurisdiction_IncidentDate(YYYY).csv.  

Next, From the homepage select "Crime and Clearance by Offense Type", deselect clearance in the left-side selector. Select "Jurisdiction by Geography" and similarly select the State and all Counties and jurisdictions. Download this, format, transpose data so NAME is on the left and you can create a similar county column as before, and save as 

*Note*: the jurisdictions don't necessarily repond to a place level plan in all cases, but they may and you can toy around here to see if they match the plan or document you're generating data for!


In [92]:
#import and examine data
data = pd.read_csv('../../Data Downloads/TBI_CrimeRatesbyJurisdiction_IncidentDate2021.csv')
data.head(3)

Unnamed: 0,NAME,County,"Crime Rate (per 1,000)",Estimated Population,Number of Crimes
0,Tennessee,,72.64,6975218.0,506713.0
1,Cheatham,,48.98,41483.0,2032.0
2,Cheatham County Sheriff's Office,Cheatham,,,1262.0


In [93]:
#create a dictionary to rename the counties to full census names
countydict = {'Cheatham': 'Cheatham County, Tennessee', 'Davidson': 'Davidson County, Tennessee', 'Maury': 'Maury County, Tennessee', 
              'Robertson': 'Robertson County, Tennessee', 'Rutherford': 'Rutherford County, Tennessee', 'Sumner': 'Sumner County, Tennessee', 
              'Trousdale': 'Trousdale County, Tennessee', 'Williamson': 'Williamson County, Tennessee', 'Wilson': 'Wilson County, Tennessee', 
              'Dickson': 'Dickson County, Tennessee', 'Houston': 'Houston County, Tennessee', "Humphreys": 'Humphreys County, Tennessee',
              'Montgomery': 'Montgomery County, Tennessee', 'Stewart': 'Stewart County, Tennessee'}

In [75]:
data = data.set_index('County').transpose()
data = data.rename(columns = countydict)
data = data.transpose().reset_index()

In [76]:
data.head()

Unnamed: 0,County,NAME,"Crime Rate (per 1,000)",Estimated Population,Number of Crimes
0,,Tennessee,72.64,6975218.0,506713.0
1,,Cheatham,48.98,41483.0,2032.0
2,"Cheatham County, Tennessee",Cheatham County Sheriff's Office,,,1262.0
3,"Cheatham County, Tennessee",Ashland City Police Department,125.55,4763.0,598.0
4,"Cheatham County, Tennessee",Kingston Springs Police Department,26.18,2712.0,71.0


In [77]:
#transpose to aggregate regions, then transpose back
data = data.set_index('NAME').transpose()
data = data.rename(columns = countydict)
GNRCCounties = [data['Stewart County, Tennessee'],data['Montgomery County, Tennessee'],
                data['Houston County, Tennessee'],data['Humphreys County, Tennessee'],
                data['Dickson County, Tennessee'],data['Cheatham County, Tennessee'],
                data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
                data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
                data['Trousdale County, Tennessee'],data['Williamson County, Tennessee'],
                data['Rutherford County, Tennessee']]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data['Stewart County, Tennessee'],data['Montgomery County, Tennessee'],
                   data['Houston County, Tennessee'],data['Humphreys County, Tennessee'],
                   data['Dickson County, Tennessee'],data['Cheatham County, Tennessee'],
                   data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
                   data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
                   data['Trousdale County, Tennessee'],data['Williamson County, Tennessee'],
                   data['Rutherford County, Tennessee'],data['Maury County, Tennessee']]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
               data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
               data['Williamson County, Tennessee'],data['Rutherford County, Tennessee'],
               data['Maury County, Tennessee']]
data['MPO'] = sum(MPOCounties)
data = data.transpose().reset_index()

In [78]:
#replace the crime rate per 1,000 population so that it calculates correctly for the regions as well
data['Crime Rate (per 1,000)'] = (data['Number of Crimes']/data['Estimated Population']) * 1000

In [79]:
data['GEO_ID'] = data['NAME'].map(geotogeoid)
data['Source'] = 'Tennessee Bureau of Investigation'

In [80]:
cols = ['Crime Rate (per 1,000)', 'Estimated Population', 'Number of Crimes']
data[cols] = data[cols].astype(float)

In [81]:
#final check
data.head()

Unnamed: 0,NAME,County,"Crime Rate (per 1,000)",Estimated Population,Number of Crimes,GEO_ID,Source
0,Tennessee,,72.644755,6975218.0,506713.0,0400000US47,Tennessee Bureau of Investigation
1,"Cheatham County, Tennessee",,48.983921,41483.0,2032.0,0500000US47021,Tennessee Bureau of Investigation
2,Cheatham County Sheriff's Office,"Cheatham County, Tennessee",,,1262.0,,Tennessee Bureau of Investigation
3,Ashland City Police Department,"Cheatham County, Tennessee",125.551123,4763.0,598.0,,Tennessee Bureau of Investigation
4,Kingston Springs Police Department,"Cheatham County, Tennessee",26.179941,2712.0,71.0,,Tennessee Bureau of Investigation


In [82]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 141 entries, 0 to 140
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   NAME                    141 non-null    object 
 1   County                  123 non-null    object 
 2   Crime Rate (per 1,000)  73 non-null     float64
 3   Estimated Population    83 non-null     float64
 4   Number of Crimes        109 non-null    float64
 5   GEO_ID                  15 non-null     object 
 6   Source                  141 non-null    object 
dtypes: float64(3), object(4)
memory usage: 7.8+ KB


In [83]:
#export to the SQLite database as only the JobsEQ annual data
conn = sq.connect('../../Outputs/Dem_Transpo_Housing_Collection.db')
data.to_sql('TBI_CrimeRate_2021', conn, if_exists = 'replace', index = False)

141

In [94]:
#import and examine
data = pd.read_csv('../../Data Downloads/TBI_CrimeTypebyJurisdiction_IncidentDate2021.csv')
data.head(3)

Unnamed: 0,NAME,County,Murder,Negligent Manslaughter,Negligent Vehicular Manslaughter,Kidnapping/Abduction,Forcible Rape,Forcible Sodomy,Sexual Assault W/Object,Forcible Fondling,Incest,Statutory Rape,Aggravated Assault,Simple Assault,Intimidation,Stalking,Commercial Sex Acts,Involuntary Servitude,Arson,Bribery,Burglary,Counterfeiting/Forgery,Destruction/Damage/Vandalism,Embezzlement,Extortion/Blackmail,Fraud - False Pretenses,Fraud - Credit Card/ATM,Fraud - Impersonation,Fraud - Welfare,Fraud - Wire,Fraud - Identity Theft,Fraud - Computer Hacking/Invasion,Robbery,Theft - Pocket-picking,Theft - Purse Snatching,Theft - Shoplifting,Theft From Building,Theft From Coin Machine,Theft From Motor Vehicle,Theft of Motor Vehicle Parts,Theft - All Other Larceny,Motor Vehicle Theft,Stolen Property Offenses,Animal Cruelty,Drug/Narcotic Violations,Drug/Narcotic Equipment Violations,Gambling - Betting/Wagering,Gambling - Operating/Promoting,Gambling - Equipment Violations,Gambling - Sports Tampering,Pornography/Obscene Material,Prostitution,Prostitution Assisting/Promoting,Purchasing Prostitution,Weapon Law Violations
0,Tennessee,,704.0,44.0,21.0,2206.0,2373.0,457.0,289.0,2744.0,27.0,367.0,38501.0,74668.0,25087.0,1847.0,118.0,2.0,955.0,26.0,23027.0,4795.0,46355.0,1577.0,630.0,12855.0,7118.0,3994.0,184.0,1629.0,441.0,166.0,4961.0,476.0,149.0,27335.0,16324.0,207.0,29676.0,13216.0,27883.0,21597.0,2806.0,588.0,55724.0,35045.0,9.0,14.0,9.0,,1048.0,246.0,28.0,135.0,16030.0
1,Cheatham,,2.0,,,8.0,7.0,,,9.0,,1.0,86.0,268.0,79.0,7.0,,,1.0,,58.0,35.0,156.0,7.0,,56.0,17.0,33.0,,22.0,,,3.0,,,59.0,75.0,1.0,46.0,54.0,96.0,59.0,18.0,2.0,431.0,280.0,,,,,1.0,,,,55.0
2,Cheatham County Sheriff's Office,Cheatham,2.0,,,7.0,6.0,,,6.0,,,68.0,210.0,58.0,5.0,,,1.0,,38.0,15.0,110.0,1.0,,12.0,11.0,28.0,,22.0,,,2.0,,,8.0,61.0,,31.0,41.0,54.0,42.0,15.0,2.0,234.0,137.0,,,,,1.0,,,,34.0


In [95]:
#create a dictionary to rename the counties to full census names
countydict = {'Cheatham': 'Cheatham County, Tennessee', 'Davidson': 'Davidson County, Tennessee', 'Maury': 'Maury County, Tennessee', 
              'Robertson': 'Robertson County, Tennessee', 'Rutherford': 'Rutherford County, Tennessee', 'Sumner': 'Sumner County, Tennessee', 
              'Trousdale': 'Trousdale County, Tennessee', 'Williamson': 'Williamson County, Tennessee', 'Wilson': 'Wilson County, Tennessee', 
              'Dickson': 'Dickson County, Tennessee', 'Houston': 'Houston County, Tennessee', "Humphreys": 'Humphreys County, Tennessee',
              'Montgomery': 'Montgomery County, Tennessee', 'Stewart': 'Stewart County, Tennessee'}

In [96]:
data = data.set_index('County').transpose()
data = data.rename(columns = countydict)
data = data.transpose().reset_index()

In [97]:
#transpose to aggregate regions, then transpose back
data = data.set_index('NAME').transpose()
data = data.rename(columns = countydict)
GNRCCounties = [data['Stewart County, Tennessee'],data['Montgomery County, Tennessee'],
                data['Houston County, Tennessee'],data['Humphreys County, Tennessee'],
                data['Dickson County, Tennessee'],data['Cheatham County, Tennessee'],
                data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
                data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
                data['Trousdale County, Tennessee'],data['Williamson County, Tennessee'],
                data['Rutherford County, Tennessee']]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data['Stewart County, Tennessee'],data['Montgomery County, Tennessee'],
                   data['Houston County, Tennessee'],data['Humphreys County, Tennessee'],
                   data['Dickson County, Tennessee'],data['Cheatham County, Tennessee'],
                   data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
                   data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
                   data['Trousdale County, Tennessee'],data['Williamson County, Tennessee'],
                   data['Rutherford County, Tennessee'],data['Maury County, Tennessee']]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
               data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
               data['Williamson County, Tennessee'],data['Rutherford County, Tennessee'],
               data['Maury County, Tennessee']]
data['MPO'] = sum(MPOCounties)
data = data.transpose().reset_index()

In [98]:
#set the index to name and county to fill the null values as 0 for mathematical operations
data = data.set_index(['NAME', 'County'])

In [99]:
data.fillna(0, inplace = True)

In [100]:
#aggregate crime type groups then drop the originals
thelist = [data['Negligent Manslaughter'], data['Negligent Vehicular Manslaughter']]
data['Manslaughter'] = sum(thelist)
thelist = [data['Forcible Rape'], data['Forcible Sodomy'], data['Sexual Assault W/Object'], data['Forcible Fondling'], data['Incest'], data['Statutory Rape'], 
          data['Aggravated Assault'], data['Simple Assault']]
data['Assault & Violent Sex Crime'] = sum(thelist)
thelist = [data['Intimidation'], data['Stalking']]
data['Intimidation & Stalking'] = sum(thelist)
thelist = [data['Arson'], data['Destruction/Damage/Vandalism']]
data['Arson, Destruction & Vandalism'] = sum(thelist)
thelist = [data['Fraud - False Pretenses'], data['Fraud - Credit Card/ATM'], data['Fraud - Impersonation'], data['Fraud - Welfare'], data['Fraud - Wire'], 
          data['Fraud - Identity Theft'], data['Fraud - Computer Hacking/Invasion']]
data['Fraud'] = sum(thelist)
thelist = [data['Theft - Pocket-picking'], data['Theft - Purse Snatching'], data['Theft - Shoplifting'], data['Theft From Building'], 
           data['Theft From Coin Machine'], data['Theft From Motor Vehicle'], data['Theft of Motor Vehicle Parts'], data['Theft - All Other Larceny'], 
           data['Motor Vehicle Theft'], data['Stolen Property Offenses']]
data['Theft'] = sum(thelist)
thelist = [data['Drug/Narcotic Violations'], data['Drug/Narcotic Equipment Violations']]
data['Drugs & Narcotics Violations'] = sum(thelist)
thelist = [data['Bribery'], data['Counterfeiting/Forgery'], data['Embezzlement'], data['Extortion/Blackmail'],
           data['Gambling - Betting/Wagering'], data['Gambling - Operating/Promoting'], data['Gambling - Equipment Violations'], 
           data['Gambling - Sports Tampering']]
data['Bribery, Forgery, Blackmail, Embezzlement & Gambling'] = sum(thelist)
thelist = [data['Prostitution'], data['Prostitution Assisting/Promoting'], data['Purchasing Prostitution'], 
           data['Commercial Sex Acts'], data['Pornography/Obscene Material']]
data['Prostitution, Commercial Sex Acts & Pornography, Aiding & Purchasing Prostitution'] = sum(thelist)
cols = ['Negligent Manslaughter','Negligent Vehicular Manslaughter','Forcible Rape','Forcible Sodomy','Sexual Assault W/Object','Forcible Fondling','Incest',
        'Statutory Rape','Aggravated Assault','Simple Assault','Intimidation','Stalking','Arson','Destruction/Damage/Vandalism','Bribery','Counterfeiting/Forgery',
        'Embezzlement','Extortion/Blackmail','Fraud - False Pretenses','Fraud - Credit Card/ATM','Fraud - Impersonation','Fraud - Welfare','Fraud - Wire',
        'Fraud - Identity Theft','Fraud - Computer Hacking/Invasion', 'Theft - Pocket-picking','Theft - Purse Snatching','Theft - Shoplifting','Theft From Building',
        'Theft From Coin Machine','Theft From Motor Vehicle','Theft of Motor Vehicle Parts','Theft - All Other Larceny','Motor Vehicle Theft',
        'Commercial Sex Acts', 'Pornography/Obscene Material',
        'Stolen Property Offenses','Drug/Narcotic Violations','Drug/Narcotic Equipment Violations','Gambling - Betting/Wagering','Gambling - Operating/Promoting',
        'Gambling - Equipment Violations','Gambling - Sports Tampering','Prostitution','Prostitution Assisting/Promoting','Purchasing Prostitution', 
        'Involuntary Servitude']# there is just hardly any of this I am dropping it for this analysis
data = data.drop(columns = cols)

In [101]:
#reset the index and examine
data = data.reset_index()

In [102]:
data['GEO_ID'] = data['NAME'].map(geotogeoid)
data['Source'] = 'Tennessee Bureau of Investigation'

In [105]:
#final check
data.head()

Unnamed: 0,NAME,County,Murder,Kidnapping/Abduction,Burglary,Robbery,Animal Cruelty,Weapon Law Violations,Manslaughter,Assault & Violent Sex Crime,Intimidation & Stalking,"Arson, Destruction & Vandalism",Fraud,Theft,Drugs & Narcotics Violations,"Bribery, Forgery, Blackmail, Embezzlement & Gambling","Prostitution, Commercial Sex Acts & Pornography, Aiding & Purchasing Prostitution",GEO_ID,Source
0,Tennessee,,704.0,2206.0,23027.0,4961.0,588.0,16030.0,65.0,119426.0,26934.0,47310.0,26387.0,139669.0,90769.0,7060.0,1575.0,0400000US47,Tennessee Bureau of Investigation
1,"Cheatham County, Tennessee",,2.0,8.0,58.0,3.0,2.0,55.0,0.0,371.0,86.0,157.0,128.0,408.0,711.0,42.0,1.0,0500000US47021,Tennessee Bureau of Investigation
2,Cheatham County Sheriff's Office,"Cheatham County, Tennessee",2.0,7.0,38.0,2.0,2.0,34.0,0.0,290.0,63.0,111.0,73.0,252.0,371.0,16.0,1.0,,Tennessee Bureau of Investigation
3,Ashland City Police Department,"Cheatham County, Tennessee",0.0,1.0,19.0,0.0,0.0,16.0,0.0,60.0,17.0,30.0,47.0,104.0,283.0,21.0,0.0,,Tennessee Bureau of Investigation
4,Kingston Springs Police Department,"Cheatham County, Tennessee",0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,1.0,4.0,2.0,17.0,39.0,2.0,0.0,,Tennessee Bureau of Investigation


In [106]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 141 entries, 0 to 140
Data columns (total 19 columns):
 #   Column                                                                             Non-Null Count  Dtype  
---  ------                                                                             --------------  -----  
 0   NAME                                                                               141 non-null    object 
 1   County                                                                             124 non-null    object 
 2   Murder                                                                             141 non-null    float64
 3   Kidnapping/Abduction                                                               141 non-null    float64
 4   Burglary                                                                           141 non-null    float64
 5   Robbery                                                                            141 non-null    float64

In [107]:
#export to feather to join to all tidydata
data.to_feather('../../Outputs/TBI_CrimeType_2021')
#export to the SQLite database as only the JobsEQ annual data
conn = sq.connect('../../Outputs/Dem_Transpo_Housing_Collection.db')
data.to_sql('TBI_CrimeType_2021', conn, if_exists = 'replace', index = False)

141