In [136]:
import geojson
import pandas as pd 
import geopandas as gpd
import numpy as np
import datetime
import itertools
from operator import itemgetter
from scipy.spatial import cKDTree
from shapely.geometry import Point, LineString
import warnings
warnings.filterwarnings("ignore")


In [3]:
with open("Atlanta _way.geojson") as f:
    gj = geojson.load(f)
features = gj['features'][0]

In [15]:
file = open("Atlanta _way.geojson")
df = gpd.read_file(file)

In [24]:
way=df[['id','geometry']]
way.head(3)

Unnamed: 0,id,geometry
0,way/9106166,"LINESTRING (-84.30199 33.59314, -84.30203 33.5..."
1,way/9106184,"LINESTRING (-84.44365 33.64577, -84.44400 33.6..."
2,way/9106187,"LINESTRING (-84.44627 33.64505, -84.44617 33.6..."


# deal with crime data 

In [211]:
crime = pd.read_csv("filter_region_crime_new.csv").iloc[:,1:]

In [212]:
crime['date']=pd.to_datetime(crime['date'])

In [213]:
crime.head(3)

Unnamed: 0,beat,crime,date,lat,location,long,neighborhood,npu,number
0,509,LARCENY-NON VEHICLE,2010-10-31,33.77101,610 SPRING ST NW,-84.38895,Downtown,M,103040029.0
1,401,AUTO THEFT,2010-10-31,33.74057,850 OAK ST SW,-84.4168,West End,T,103040061.0
2,301,LARCENY-FROM VEHICLE,2010-10-31,33.71803,1344 METROPOLITAN PKWY SW,-84.40774,Capitol View Manor,X,103040169.0


In [214]:
def groupdf(df,date1,date2):
    df1= df[(df['date']>=date1)&(df['date']<=date2)]
    result=df1.groupby(["lat","long"]).agg({'number':'count'}).reset_index()
    return result 

In [215]:
ag1=groupdf(crime,'2010-01-01','2010-12-31')
ag2=groupdf(crime,'2011-01-01','2011-12-31')
ag3=groupdf(crime,'2012-01-01','2012-12-31')
ag4=groupdf(crime,'2013-01-01','2013-12-31')
ag5=groupdf(crime,'2014-01-01','2014-12-31')
ag6=groupdf(crime,'2015-01-01','2015-12-31')
ag7=groupdf(crime,'2016-01-01','2016-12-31')

In [216]:
crime_per=pd.concat([ag1,ag2,ag3,ag4,ag5,ag6,ag7])

In [217]:
crime_p=crime_per.groupby(["lat","long"]).agg({'number':'mean'}).reset_index()

In [218]:
crime_p['number']=round(crime_p['number'],2)

In [219]:
crime_p.head(3)

Unnamed: 0,lat,long,number
0,33.62176,-84.44794,1.0
1,33.62505,-84.42445,1.0
2,33.62522,-84.43101,1.0


In [220]:
# convert crime to geopandas df 
gdf = gpd.GeoDataFrame(
    crime_p, geometry=gpd.points_from_xy(crime_p.long, crime_p.lat))

# assign nearest way to the crime point 

In [221]:
def ckdnearest(gdfA, gdfB, gdfB_cols=['id']):
    A = np.concatenate(
        [np.array(geom.coords) for geom in gdfA.geometry.to_list()])
    B = [np.array(geom.coords) for geom in gdfB.geometry.to_list()]
    B_ix = tuple(itertools.chain.from_iterable(
        [itertools.repeat(i, x) for i, x in enumerate(list(map(len, B)))]))
    B = np.concatenate(B)
    ckd_tree = cKDTree(B)
    dist, idx = ckd_tree.query(A, k=1)
    idx = itemgetter(*idx)(B_ix)
    gdf = pd.concat(
        [gdfA, gdfB.loc[idx, gdfB_cols].reset_index(drop=True),
         pd.Series(dist, name='dist')], axis=1)
    return gdf

In [222]:
crime_way=ckdnearest(gdf, way, gdfB_cols=['id'])

In [223]:
crime_way.head(3)

Unnamed: 0,lat,long,number,geometry,id,dist
0,33.62176,-84.44794,1.0,POINT (-84.44794 33.62176),way/133115249,0.000112
1,33.62505,-84.42445,1.0,POINT (-84.42445 33.62505),way/41446011,9.2e-05
2,33.62522,-84.43101,1.0,POINT (-84.43101 33.62522),way/287413972,3.9e-05


In [224]:
final_df= crime_way.groupby('id').agg({'number':'sum','dist':'mean'}).reset_index().rename(columns={'number':'crime_count','dist':'avg_distance'})

In [225]:
final_df.to_csv("crime_link_way_new.csv",index=False)

In [227]:
final_df.head(4)

Unnamed: 0,id,crime_count,avg_distance
0,way/101797829,1.0,0.000192
1,way/101797968,10.6,0.002257
2,way/107533616,2.0,0.000767
3,way/107533620,4.0,0.000453
