In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import folium

In [2]:
sh_df = gpd.read_file("../data/statistical_neighborhoods/statistical_neighborhoods.shp")
sh_df.drop(['TYPOLOGY', 'NOTES'], axis=1, inplace=True)
sh_df = sh_df[~(sh_df.NBHD_NAME == 'CBD')]

In [4]:
def clean_shape_names(string):
    return string.replace('- ', '')

sh_df['NBHD_NAME'] = sh_df['NBHD_NAME'].map(clean_shape_names)

In [5]:
crime = pd.read_csv('../data/denver_crime.csv')
crime['FIRST_OCCURRENCE_DATE'] = pd.to_datetime(crime['FIRST_OCCURRENCE_DATE'], format="%m/%d/%Y %I:%M:%S %p")
crime['MONTH'] = crime['FIRST_OCCURRENCE_DATE'].dt.month
crime['YEAR'] = crime['FIRST_OCCURRENCE_DATE'].dt.year
crime['WEEK'] = crime['FIRST_OCCURRENCE_DATE'].dt.isocalendar().week
crime_df = crime[['INCIDENT_ID', 'OFFENSE_CATEGORY_ID', 'YEAR', 'NEIGHBORHOOD_ID']].copy()
crime[['INCIDENT_ID', 'OFFENSE_CATEGORY_ID', 'YEAR', 'NEIGHBORHOOD_ID']].head(3)

Unnamed: 0,INCIDENT_ID,OFFENSE_CATEGORY_ID,YEAR,NEIGHBORHOOD_ID
0,2018869789,larceny,2018,central-park
1,202111218,all-other-crimes,2021,union-station
2,20176005213,larceny,2017,union-station


In [6]:
crime = pd.read_csv('../data/denver_crime.csv').drop(['Unnamed: 0'], axis=1)
crime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 454540 entries, 0 to 454539
Data columns (total 19 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   INCIDENT_ID             454540 non-null  int64  
 1   OFFENSE_ID              454540 non-null  int64  
 2   OFFENSE_CODE            454540 non-null  int64  
 3   OFFENSE_CODE_EXTENSION  454540 non-null  int64  
 4   OFFENSE_TYPE_ID         454540 non-null  object 
 5   OFFENSE_CATEGORY_ID     454540 non-null  object 
 6   FIRST_OCCURRENCE_DATE   454540 non-null  object 
 7   LAST_OCCURRENCE_DATE    152118 non-null  object 
 8   REPORTED_DATE           454540 non-null  object 
 9   INCIDENT_ADDRESS        414371 non-null  object 
 10  GEO_X                   450607 non-null  float64
 11  GEO_Y                   450607 non-null  float64
 12  GEO_LON                 450606 non-null  float64
 13  GEO_LAT                 450606 non-null  float64
 14  DISTRICT_ID         

In [7]:
def clean_neighborhood_names(string):
    name =  ' '.join([x.capitalize() for x in string.split('-')])
    if name == 'Dia':
        return 'DIA'
    return name

In [8]:
crime_df['NEIGHBORHOOD_ID'] = crime_df['NEIGHBORHOOD_ID'].map(clean_neighborhood_names, na_action='ignore')
print(len(crime_df))
crime_df.head()

454540


Unnamed: 0,INCIDENT_ID,OFFENSE_CATEGORY_ID,YEAR,NEIGHBORHOOD_ID
0,2018869789,larceny,2018,Central Park
1,202111218,all-other-crimes,2021,Union Station
2,20176005213,larceny,2017,Union Station
3,20196012240,larceny,2019,West Colfax
4,2018861883,all-other-crimes,2018,Montbello


In [9]:
crime_df = crime_df[~((crime_df['NEIGHBORHOOD_ID'].isin(['cbd', 'Cbd'])) | (crime_df['NEIGHBORHOOD_ID'].isnull()))]
len(crime_df)

439090

In [10]:
A = set(crime_df.NEIGHBORHOOD_ID.unique()) 
B = set(sh_df.NBHD_NAME.unique())
print(A-B)
print(B-A)

set()
set()


In [11]:
geo_df = pd.merge(left=crime_df, right=sh_df, left_on='NEIGHBORHOOD_ID', right_on='NBHD_NAME')
print(len(geo_df))
geo_df.head()

439090


Unnamed: 0,INCIDENT_ID,OFFENSE_CATEGORY_ID,YEAR,NEIGHBORHOOD_ID,NBHD_ID,NBHD_NAME,geometry
0,2018869789,larceny,2018,Central Park,60,Central Park,"POLYGON ((-104.86604 39.79841, -104.86604 39.7..."
1,2016361998,theft-from-motor-vehicle,2016,Central Park,60,Central Park,"POLYGON ((-104.86604 39.79841, -104.86604 39.7..."
2,20166001260,theft-from-motor-vehicle,2016,Central Park,60,Central Park,"POLYGON ((-104.86604 39.79841, -104.86604 39.7..."
3,2019528949,auto-theft,2019,Central Park,60,Central Park,"POLYGON ((-104.86604 39.79841, -104.86604 39.7..."
4,2016713435,traffic-accident,2016,Central Park,60,Central Park,"POLYGON ((-104.86604 39.79841, -104.86604 39.7..."


In [12]:
def count_by_category_and_year(df, category, years = [2020]):
    count_df = (df[(df['OFFENSE_CATEGORY_ID']==category) & (df['YEAR'].isin(years))]
                            .groupby('NEIGHBORHOOD_ID')['INCIDENT_ID'].count().reset_index()
                              .rename(columns={'INCIDENT_ID': 'Count', 'NEIGHBORHOOD_ID' :'Neighborhood'}))
    
    merged_count = pd.merge(left=sh_df, right=count_df, left_on='NBHD_NAME', right_on='Neighborhood').rename(columns = {'NBHD_ID': 'id'})
    merged_count.drop(['NBHD_NAME'], axis=1, inplace=True)
    return merged_count


print(len(count_by_category_and_year(geo_df, 'auto-theft')))
count_by_category_and_year(geo_df, 'auto-theft')

77


Unnamed: 0,id,geometry,Neighborhood,Count
0,2,"POLYGON ((-105.00042 39.74552, -105.00041 39.7...",Auraria,10
1,21,"POLYGON ((-104.94070 39.69540, -104.94070 39.6...",Cory Merrill,29
2,7,"POLYGON ((-104.94070 39.71156, -104.94069 39.7...",Belcaro,41
3,70,"POLYGON ((-104.95931 39.71566, -104.95931 39.7...",Washington Park,54
4,71,"POLYGON ((-104.97342 39.68982, -104.97356 39.6...",Washington Park West,86
...,...,...,...,...
72,77,"POLYGON ((-104.95977 39.75072, -104.96038 39.7...",Whittier,57
73,18,"POLYGON ((-104.95975 39.76199, -104.96037 39.7...",Cole,50
74,76,"POLYGON ((-105.03970 39.71125, -105.03849 39.7...",Westwood,148
75,62,"POLYGON ((-104.99818 39.78256, -104.99820 39.7...",Sunnyside,123


In [13]:
def choropleth_plot(data, myscale, category, years):
    mapa = folium.Map(location=[39.7807, -104.8208], 
               tiles="CartoDB positron", 
               zoom_start=11.25)

    folium.Choropleth(
        geo_data=data,
        name='choropleth',
        data=data,
        columns=['Neighborhood','Count'], 
        key_on='feature.properties.Neighborhood',
        fill_color='OrRd',
        threshold_scale=myscale,
        fill_opacity=0.5,
        line_opacity=0.75,
        legend_name=f'{category} in {years}'
    ).add_to(mapa)

    style_function = lambda x: {'fillColor': '#ffffff', 
                                'color':'#000000', 
                                'fillOpacity': 0.1, 
                                'weight': 0.1}

    hover_feature = folium.features.GeoJson(
        data,
        style_function=style_function, 
        tooltip=folium.features.GeoJsonTooltip(
            fields=['Neighborhood','Count'], 
            aliases=['Neighborhood: ', f'Count of {category} incidences: '],
        )
    )
    
    mapa.add_child(hover_feature)
    return mapa

In [14]:
category='auto-theft'

data2020 = count_by_category_and_year(geo_df, category, [2020])
data2019 = count_by_category_and_year(geo_df, category, [2019])
# (0,0.3, 0.6, 0.965 ,1)
scale = (pd.concat([data2020, data2019]).Count.quantile((0,0.3, 0.8, 0.965 ,1))).tolist()

In [15]:
choropleth_plot(data2020, scale, category, 2020)

In [16]:
choropleth_plot(data2019, scale, category, 2019)

In [17]:
top = ['five-points', 'central-park', 'capitol-hill', 'montbello',
 'baker', 'lincoln-park', 'civic-center', 'union-station','east-colfax', 'gateway-green-valley-ranch',
 'westwood', 'west-colfax', 'north-capitol-hill', 'northeast-park-hill', 'highland']

top = [n.replace('-', ' ') for n in top]
sh_df['NBHD_NAME']=sh_df['NBHD_NAME'].map(lambda x: x.replace('- ', '').lower())
set(top).difference(set(sh_df.NBHD_NAME.unique()))

set()

In [18]:
top_area = sh_df[sh_df.NBHD_NAME.isin(top)]['geometry'].to_crs('epsg:3857').area.sum()
total_area = sh_df.geometry.to_crs('epsg:3857').area.sum()
print(top_area/total_area)

0.23177561603142147
