###### Imports and Settings

In [1]:
import pandas as pd
import numpy as np
import requests
from functools import reduce
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
import sys
sys.path.append("../../Functions and Dictionaries") # Adds higher directory to python modules path
import geodict
tofullcensus = geodict.tofullcensus
geotogeoid = geodict.geotogeoid
import sqlite3 as sq

# This notebook outlines the download and formatting process for data from the Tennessee Bureau of Investigation.  

Go to this page: https://crimeinsight.tbi.tn.gov/public/View/dispview.aspx?ReportId=20, "Crime Rates by Jurisdiction" fom this page: https://crimeinsight.tbi.tn.gov/public/Browse/browsetables.aspx
We are looking for Crime Rates by county and the State of Tennessee here. Select "Jurisdiction by Geography" on the left-side of the screen, where you are then able to manipulate geographies by level. Select the State, and then deselect the regions, select counties in the GNRC operating area (most in Middle, some in West), and under those counties leave the jurisdictions. Click "show report" in the top right-side of the screen, and then download a multi-dimensional csv file. Delete the data headers, rename Jurisdiction by Geography to "NAME", make sure that the numbers are not formatted with commas, add a county column to tag the juridictions with a county, leaving the column null for the state and counties, and set the column headers correctly before saving the file as "TBRCrimeRatesbyJurisdiction_IncidentDate(YYYY).csv.  

Next, From the homepage select "Crime and Clearance by Offense Type", deselect clearance in the left-side selector. Select "Jurisdiction by Geography" and similarly select the State and all Counties and jurisdictions. Download this, format, transpose data so NAME is on the left and you can create a similar county column as before, and save as 

*Note*: the jurisdictions don't necessarily repond to a place level plan in all cases, but they may and you can toy around here to see if they match the plan or document you're generating data for!


In [66]:
#import and examine data for the most recent year of crime rate now 2023 on 11/25/2024
data = pd.read_csv('../Data Downloads/TBI_CrimeRatesbyJurisdiction_IncidentDate2023.csv')
data['County'] = data['County'].fillna('Full')
data.head(3)

Unnamed: 0,NAME,County,"Crime Rate (per 1,000)",Estimated Population,Number of Crimes
0,Tennessee,Full,71.69,7149042.0,512485.0
1,Cheatham,Full,44.98,42245.0,1900.0
2,Cheatham County Sheriff's Office,Cheatham,,,1284.0


In [67]:
#create a dictionary to rename the counties to full census names
countydict = {'Cheatham': 'Cheatham County, Tennessee', 'Davidson': 'Davidson County, Tennessee', 'Maury': 'Maury County, Tennessee', 
              'Robertson': 'Robertson County, Tennessee', 'Rutherford': 'Rutherford County, Tennessee', 'Sumner': 'Sumner County, Tennessee', 
              'Trousdale': 'Trousdale County, Tennessee', 'Williamson': 'Williamson County, Tennessee', 'Wilson': 'Wilson County, Tennessee', 
              'Dickson': 'Dickson County, Tennessee', 'Houston': 'Houston County, Tennessee', "Humphreys": 'Humphreys County, Tennessee',
              'Montgomery': 'Montgomery County, Tennessee', 'Stewart': 'Stewart County, Tennessee'}

In [68]:
data = data.set_index('County').transpose()
data = data.rename(columns = countydict)
data = data.transpose().reset_index()

In [69]:
aside = data.loc[data['County'] != 'Full']
data = data.loc[data['County'] == 'Full'].drop(columns = 'County')

In [70]:
aside.head()

Unnamed: 0,County,NAME,"Crime Rate (per 1,000)",Estimated Population,Number of Crimes
2,"Cheatham County, Tennessee",Cheatham County Sheriff's Office,,,1284.0
3,"Cheatham County, Tennessee",Ashland City Police Department,79.41,5692.0,452.0
4,"Cheatham County, Tennessee",Kingston Springs Police Department,21.81,2751.0,60.0
5,"Cheatham County, Tennessee",Pleasant View Police Department,18.91,5500.0,104.0
7,"Davidson County, Tennessee",Tennessee Alcoholic Beverage Commission,,,26.0


In [71]:
data.head()

Unnamed: 0,NAME,"Crime Rate (per 1,000)",Estimated Population,Number of Crimes
0,Tennessee,71.69,7149042.0,512485.0
1,Cheatham,44.98,42245.0,1900.0
6,Davidson,121.82,709720.0,86460.0
50,Maury,70.53,111628.0,7873.0
57,Robertson,35.34,78912.0,2789.0


In [72]:
#transpose to aggregate regions, then transpose back
data = data.set_index('NAME').transpose()
data = data.rename(columns = countydict)
GNRCCounties = [data['Stewart County, Tennessee'],data['Montgomery County, Tennessee'],
                data['Houston County, Tennessee'],data['Humphreys County, Tennessee'],
                data['Dickson County, Tennessee'],data['Cheatham County, Tennessee'],
                data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
                data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
                data['Trousdale County, Tennessee'],data['Williamson County, Tennessee'],
                data['Rutherford County, Tennessee']]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data['Stewart County, Tennessee'],data['Montgomery County, Tennessee'],
                   data['Houston County, Tennessee'],data['Humphreys County, Tennessee'],
                   data['Dickson County, Tennessee'],data['Cheatham County, Tennessee'],
                   data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
                   data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
                   data['Trousdale County, Tennessee'],data['Williamson County, Tennessee'],
                   data['Rutherford County, Tennessee'],data['Maury County, Tennessee']]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
               data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
               data['Williamson County, Tennessee'],data['Rutherford County, Tennessee'],
               data['Maury County, Tennessee']]
data['MPO'] = sum(MPOCounties)
data = data.transpose().reset_index()

In [73]:
#make the county column the same as the name for the full counties.. and state whatever
data['County'] = data['NAME']

In [74]:
#replace the crime rate per 1,000 population so that it calculates correctly for the regions as well
data['Crime Rate (per 1,000)'] = (data['Number of Crimes']/data['Estimated Population']) * 1000

In [75]:
data = pd.concat([data, aside])

In [76]:
data['Year'] = '2023'

In [77]:
data.head()

Unnamed: 0,NAME,"Crime Rate (per 1,000)",Estimated Population,Number of Crimes,County,Year
0,Tennessee,71.685829,7149042.0,512485.0,Tennessee,2023
1,"Cheatham County, Tennessee",44.975737,42245.0,1900.0,"Cheatham County, Tennessee",2023
2,"Davidson County, Tennessee",121.822691,709720.0,86460.0,"Davidson County, Tennessee",2023
3,"Maury County, Tennessee",70.5289,111628.0,7873.0,"Maury County, Tennessee",2023
4,"Robertson County, Tennessee",35.343167,78912.0,2789.0,"Robertson County, Tennessee",2023


In [78]:
data['GEO_ID'] = data['NAME'].map(geotogeoid)
data['Source'] = 'Tennessee Bureau of Investigation'

In [79]:
cols = ['Crime Rate (per 1,000)', 'Estimated Population', 'Number of Crimes']
data[cols] = data[cols].astype(float)

In [80]:
#final check
data.head()

Unnamed: 0,NAME,"Crime Rate (per 1,000)",Estimated Population,Number of Crimes,County,Year,GEO_ID,Source
0,Tennessee,71.685829,7149042.0,512485.0,Tennessee,2023,0400000US47,Tennessee Bureau of Investigation
1,"Cheatham County, Tennessee",44.975737,42245.0,1900.0,"Cheatham County, Tennessee",2023,0500000US47021,Tennessee Bureau of Investigation
2,"Davidson County, Tennessee",121.822691,709720.0,86460.0,"Davidson County, Tennessee",2023,0500000US47037,Tennessee Bureau of Investigation
3,"Maury County, Tennessee",70.5289,111628.0,7873.0,"Maury County, Tennessee",2023,0500000US47119,Tennessee Bureau of Investigation
4,"Robertson County, Tennessee",35.343167,78912.0,2789.0,"Robertson County, Tennessee",2023,0500000US47147,Tennessee Bureau of Investigation


In [81]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 141 entries, 0 to 137
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   NAME                    141 non-null    object 
 1   Crime Rate (per 1,000)  74 non-null     float64
 2   Estimated Population    80 non-null     float64
 3   Number of Crimes        102 non-null    float64
 4   County                  141 non-null    object 
 5   Year                    141 non-null    object 
 6   GEO_ID                  15 non-null     object 
 7   Source                  141 non-null    object 
dtypes: float64(3), object(5)
memory usage: 9.9+ KB


In [83]:
twentythree = data

In [84]:
#export to the SQLite database as only the 2022 data
conn = sq.connect('../Outputs/TBI.db')
twentythree.to_sql('TBI_CrimeRate_2023', conn, if_exists = 'replace', index = False)

141

In [85]:
#concat to the old data to have all crime rates = latest was 2022
data = pd.read_csv('../Data Downloads/TBI_CrimeRatesbyJurisdiction_05to22.csv')
twentythree = twentythree.drop(columns = ['GEO_ID', 'Source'])
data = pd.concat([data, twentythree])
conn = sq.connect('../Outputs/TBI.db')
data.to_sql('TBI_CrimeRatesbyJurisdiction_05to23', conn, if_exists = 'replace', index = False)

15497