## Import packages

In [1]:
## for data
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import math

## for plotting
import matplotlib.pyplot as plt
import seaborn as sns

## for geospatial
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
import geopy

## for machine learning
from sklearn import preprocessing, cluster
import scipy

## for deep learning
import minisom

## Load shapefiles

In [2]:
base_dir = './datasets/Boston'
census_blk_tracts = gpd.read_file(f'{base_dir}/census_tracts_shapefile/census_tracts.shp')
census_blk_tracts = census_blk_tracts.to_crs(epsg=4326)
census_blks = census_blk_tracts[['NAME10', 'geometry']]
census_blks = census_blks.rename(columns={'NAME10': 'census_tract_name'})

## Create base interactive map

In [3]:
m = folium.Map(location=[42.32,-71.0589], tiles='cartodbpositron', zoom_start=11)

## Overlay boundaries to base map

In [4]:
Choropleth(geo_data=census_blk_tracts, 
#            data=plot_dict, 
           key_on="feature.id", 
           fill_color='YlGnBu', 
           fill_opacity = 0.3,
           legend_name='Boston census tracts'
          ).add_to(m)
m

## Load crime data

In [5]:
# Load the data
crimes = pd.read_csv(f'{base_dir}/crime.csv', encoding='latin-1')

# Drop rows with missing locations
crimes.dropna(subset=['Lat', 'Long', 'DISTRICT'], inplace=True)

# Focus on major crimes in 2018
crimes = crimes[crimes.OFFENSE_CODE_GROUP.isin([
    'Larceny', 'Auto Theft', 'Robbery', 'Larceny From Motor Vehicle', 'Residential Burglary',
    'Simple Assault', 'Harassment', 'Ballistics', 'Aggravated Assault', 'Other Burglary', 
    'Arson', 'Commercial Burglary', 'HOME INVASION', 'Homicide', 'Criminal Harassment', 
    'Manslaughter'])]
crimes = crimes[crimes.YEAR>=2018]

crimes_gpd = gpd.GeoDataFrame(crimes, geometry=gpd.points_from_xy(crimes.Long, crimes.Lat))
crimes_gpd.crs = 'epsg:4326'

crime_locations = crimes_gpd[['OFFENSE_CODE_GROUP', 'geometry']]
crime_locations = crime_locations.rename(columns={'OFFENSE_CODE_GROUP': 'offense'})
crime_locations.head(3)

Unnamed: 0,offense,geometry
0,Larceny,POINT (-71.13937 42.35779)
6,Auto Theft,POINT (-71.08273 42.30607)
8,Robbery,POINT (-71.07085 42.33152)


In [6]:
census_blks.head(3)

Unnamed: 0,census_tract_name,geometry
0,104.05,"POLYGON ((-71.09009 42.34667, -71.09001 42.346..."
1,104.04,"POLYGON ((-71.09066 42.33977, -71.09103 42.339..."
2,108.01,"POLYGON ((-71.08160 42.35371, -71.08044 42.354..."


## Spatial Join crime Points with census tracts Polygons

In [7]:
crime_with_census_tracts = gpd.sjoin(crime_locations, census_blks, how="inner", op='intersects')
crime_with_census_tracts.head(3)

Unnamed: 0,offense,geometry,index_right,census_tract_name
0,Larceny,POINT (-71.13937 42.35779),180,1
176,Robbery,POINT (-71.14426 42.36053),180,1
177,Aggravated Assault,POINT (-71.14426 42.36053),180,1


In [8]:
census_wise_crime_counts = pd.DataFrame(crime_with_census_tracts.groupby('index_right').index_right.count())
census_wise_crime_counts = pd.concat([census_blks, census_wise_crime_counts], axis=1)
census_wise_crime_counts = census_wise_crime_counts.rename(columns={'index_right': 'count'})
census_wise_crime_counts = gpd.GeoDataFrame(census_wise_crime_counts)
census_wise_crime_counts.head(3)

Unnamed: 0,census_tract_name,geometry,count
0,104.05,"POLYGON ((-71.09009 42.34667, -71.09001 42.346...",87.0
1,104.04,"POLYGON ((-71.09066 42.33977, -71.09103 42.339...",35.0
2,108.01,"POLYGON ((-71.08160 42.35371, -71.08044 42.354...",27.0


## Save Geo-Dataframe as shapefile

In [9]:
output_folder = f'{base_dir}/census_wise_crime_counts'

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

census_wise_crime_counts.to_file(f'{output_folder}/census_wise_crime_counts.shp')

  census_wise_crime_counts.to_file(f'{output_folder}/census_wise_crime_counts.shp')
