## Data exploration 

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point


Load the data set and clean it 

In [3]:
df_chicago=pd.read_csv('Crime/Crimes_-_2001_to_Present.csv',index_col='ID')
# clean the data without geocode and time stamp
df_chicago=df_chicago.dropna(subset=['Date','Location'])
df_chicago['Date']=pd.to_datetime(df_chicago['Date'])
# correct the value of crime type
df_chicago['Primary Type'].replace('CRIM SEXUAL ASSAULT','CRIMINAL SEXUAL ASSAULT',inplace=True)

In [9]:
#get geometry point of data
df_chicago['Location'] = df_chicago.apply(lambda row: Point(row.Longitude, row.Latitude), axis=1)
# get geopandas frame
df_chicago=gpd.GeoDataFrame(df_chicago,geometry='Location')

Now combine the data set with the [boundary data of chicago](https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6)

In [10]:
geof='Crime/Boundaries - Community Areas (current).geojson'
chicago_area=gpd.read_file(geof)
# only keep teh information of commuity and geometry
chicago_area=chicago_area[['community','area_numbe','geometry']]
chicago_area=chicago_area.rename(columns={'area_numbe':'area_num'})

In [11]:
#set crs to the geo data frame
df_chicago=df_chicago.set_crs(chicago_area.crs)
#sptial join 
df_chicago= df_chicago.sjoin(chicago_area, how='inner',predicate='within')

In [13]:
df_chicago.head()

Unnamed: 0_level_0,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,...,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location,index_right,community,area_num
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10224853,HY411790,2015-09-05 16:15:00,041XX W FIFTH AVE,1570,SEX OFFENSE,PUBLIC INDECENCY,SIDEWALK,True,False,1132,...,1148735.0,1896483.0,2015,09/17/2015 11:37:18 AM,41.871884,-87.729404,POINT (-87.72940 41.87188),26,WEST GARFIELD PARK,26
10232192,HY419551,2015-09-10 16:00:00,039XX W GLADYS AVE,1750,OFFENSE INVOLVING CHILDREN,CHILD ABUSE,RESIDENCE,False,True,1132,...,1150207.0,1898066.0,2015,02/10/2018 03:50:01 PM,41.876199,-87.723958,POINT (-87.72396 41.87620),26,WEST GARFIELD PARK,26
10329071,HY519515,2015-11-29 15:00:00,038XX W WILCOX ST,281,CRIMINAL SEXUAL ASSAULT,NON-AGGRAVATED,OTHER,False,False,1122,...,1150799.0,1899061.0,2015,02/10/2018 03:50:01 PM,41.878918,-87.721758,POINT (-87.72176 41.87892),26,WEST GARFIELD PARK,26
10240564,HY428120,2015-09-17 20:00:00,039XX W WASHINGTON BLVD,281,CRIMINAL SEXUAL ASSAULT,NON-AGGRAVATED,RESIDENCE,False,False,1122,...,1150014.0,1900125.0,2015,02/10/2018 03:50:01 PM,41.881853,-87.724613,POINT (-87.72461 41.88185),26,WEST GARFIELD PARK,26
10242207,HY430128,2015-09-18 22:35:00,044XX W MADISON ST,1506,PROSTITUTION,SOLICIT ON PUBLIC WAY,SIDEWALK,True,False,1113,...,1146742.0,1899645.0,2015,02/10/2018 03:50:01 PM,41.880599,-87.73664,POINT (-87.73664 41.88060),26,WEST GARFIELD PARK,26


In [15]:
# check the type and count of crime data
df_chicago.groupby('Primary Type').size()


Primary Type
CRIMINAL SEXUAL ASSAULT       28748
DOMESTIC VIOLENCE                 1
HUMAN TRAFFICKING                67
OFFENSE INVOLVING CHILDREN    47860
PROSTITUTION                  68595
SEX OFFENSE                   26328
STALKING                       3798
dtype: int64

In [19]:

target_type=['SEX OFFENSE ','CRIMINAL SEXUAL ASSAULT']
# subset data
crime_children= df_chicago[df_chicago['Primary Type']=='OFFENSE INVOLVING CHILDREN']
crime_sexual= df_chicago[df_chicago['Primary Type'].isin(target_type)]
print(f'Size of violence against children: {crime_children.shape[0]}')
print(f'Size of violence against women:{crime_sexual.shape[0]}')

Size of violence against children: 47860
Size of violence against women:28748


# Spatial Pattern Analysis
Code in this section is adapted from 
[construct spatial weight matrix](https://pysal.org/libpysal/generated/libpysal.weights.W.html#libpysal.weights.W)

In [None]:
# Visualise the distribution of 