In [17]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
import statsmodels.formula.api as smf
import scipy

from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression 
from sklearn import metrics


_data311 = pd.read_csv('311_2019.csv')
_dataShoot = pd.read_csv('NYPD_Shooting_Incident_Data__Historic_.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [18]:
zips = gpd.read_file('zipcode/ZIP_CODE_040114.shp')
zips["COUNTY"].replace({
    "Bronx": "BRONX",
    "Queens": "QUEENS",
    "New York": "MANHATTAN",
    "Kings": "BROOKLYN", 
    "Richmond": "STATEN ISLAND"
}, inplace=True)
zips.drop_duplicates(subset=['ZIPCODE'], inplace=True)
zips.to_crs(epsg=4326, inplace=True)
zips.head(0)

Unnamed: 0,ZIPCODE,BLDGZIP,PO_NAME,POPULATION,AREA,STATE,COUNTY,ST_FIPS,CTY_FIPS,URL,SHAPE_AREA,SHAPE_LEN,geometry


In [19]:
def handle311(data):
  columns = ['Unique Key', 'Agency','Complaint Type', 'Incident Zip', 'Latitude','Longitude']
  data = data[columns]
  data = data.dropna()
  data['Incident Zip'] = data['Incident Zip'].astype(float).astype(int)
  data = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy(data.Longitude, data.Latitude))
  return data

In [20]:
def handleShoot(data):
  columns=['INCIDENT_KEY', 'OCCUR_DATE', 'OCCUR_TIME', 'Latitude', 'Longitude']
  data = data[columns]
  data = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy(data.Longitude, data.Latitude))
  _zips = zips[['ZIPCODE', 'geometry']]
  _data = gpd.sjoin(data, _zips)
  return _data

In [21]:
_dataShoot=handleShoot(_dataShoot)
_data311=handle311(_data311)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  _data = gpd.sjoin(data, _zips)


In [22]:
dataShoot = _dataShoot
data311 = _data311

In [23]:
dataShoot.head()

Unnamed: 0,INCIDENT_KEY,OCCUR_DATE,OCCUR_TIME,Latitude,Longitude,geometry,index_right,ZIPCODE
0,201575314,08/23/2019,22:10:00,40.697805,-73.808141,POINT (-73.80814 40.69781),174,11435
164,201384509,08/20/2019,20:08:00,40.693824,-73.800597,POINT (-73.80060 40.69382),174,11435
580,193118593,02/02/2019,00:45:00,40.689573,-73.796377,POINT (-73.79638 40.68957),174,11435
694,193118593,02/02/2019,00:45:00,40.689573,-73.796377,POINT (-73.79638 40.68957),174,11435
737,200995687,08/11/2019,04:00:00,40.688447,-73.796124,POINT (-73.79612 40.68845),174,11435


In [24]:
data311.head()

Unnamed: 0,Unique Key,Agency,Complaint Type,Incident Zip,Latitude,Longitude,geometry
1,44079566,DPR,Damaged Tree,11434,40.670854,-73.771876,POINT (-73.77188 40.67085)
2,45211021,DSNY,Graffiti,11212,40.663072,-73.925752,POINT (-73.92575 40.66307)
3,45232110,DSNY,Graffiti,10016,40.744629,-73.976001,POINT (-73.97600 40.74463)
4,43573777,DSNY,Graffiti,10032,40.835618,-73.945652,POINT (-73.94565 40.83562)
5,43524214,NYPD,Noise - Commercial,11420,40.675929,-73.818015,POINT (-73.81802 40.67593)


In [29]:
complaints = data311['Complaint Type'].unique()
matrix = pd.DataFrame(0, columns=list(complaints), index=list(zips.ZIPCODE.astype(int)))
matrix.head()

Unnamed: 0,Damaged Tree,Graffiti,Noise - Commercial,FATF,Overgrown Tree/Branches,Lost Property,Rodent,Illegal Parking,Broken Parking Meter,Non-Emergency Police Matter,...,SRDE,Request Large Bulky Item Collection,Sweeping/Missed,DOF Property - Payment Issue,Litter Basket / Request,DPR Internal,Overflowing Litter Baskets,Outside Building,Electric,Quality of Life
11436,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11213,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11212,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11225,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11218,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
for index, row in data311.iterrows():
  if(row['Incident Zip'] in matrix.index):
    matrix.loc[row['Incident Zip'], row['Complaint Type']] += 1
matrix.head()

Unnamed: 0,Damaged Tree,Graffiti,Noise - Commercial,FATF,Overgrown Tree/Branches,Lost Property,Rodent,Illegal Parking,Broken Parking Meter,Non-Emergency Police Matter,...,SRDE,Request Large Bulky Item Collection,Sweeping/Missed,DOF Property - Payment Issue,Litter Basket / Request,DPR Internal,Overflowing Litter Baskets,Outside Building,Electric,Quality of Life
11436,71,3,2,0,67,0,28,297,0,46,...,0,0,0,0,0,0,0,0,0,0
11213,111,74,128,0,52,7,227,497,16,26,...,0,1,0,0,0,0,0,0,0,0
11212,84,86,75,0,76,2,169,518,30,215,...,0,0,0,0,0,0,0,0,0,0
11225,90,95,150,0,42,1,201,592,14,24,...,0,0,0,0,0,0,0,0,0,0
11218,203,107,55,0,97,1,176,1143,27,49,...,0,1,0,0,0,0,0,0,0,0


In [31]:
matrix = matrix.reset_index()
matrix = matrix.rename(columns={'index': 'zipcode'})
matrix.head()

Unnamed: 0,zipcode,Damaged Tree,Graffiti,Noise - Commercial,FATF,Overgrown Tree/Branches,Lost Property,Rodent,Illegal Parking,Broken Parking Meter,...,SRDE,Request Large Bulky Item Collection,Sweeping/Missed,DOF Property - Payment Issue,Litter Basket / Request,DPR Internal,Overflowing Litter Baskets,Outside Building,Electric,Quality of Life
0,11436,71,3,2,0,67,0,28,297,0,...,0,0,0,0,0,0,0,0,0,0
1,11213,111,74,128,0,52,7,227,497,16,...,0,1,0,0,0,0,0,0,0,0
2,11212,84,86,75,0,76,2,169,518,30,...,0,0,0,0,0,0,0,0,0,0
3,11225,90,95,150,0,42,1,201,592,14,...,0,0,0,0,0,0,0,0,0,0
4,11218,203,107,55,0,97,1,176,1143,27,...,0,1,0,0,0,0,0,0,0,0


In [32]:
matrix.to_csv('ComplaintsMetrix.csv')

In [33]:
g = dataShoot[['ZIPCODE','INCIDENT_KEY']].groupby('ZIPCODE',as_index=False).count()
g.head()

Unnamed: 0,ZIPCODE,INCIDENT_KEY
0,83,1
1,10001,43
2,10002,95
3,10003,29
4,10004,2


In [35]:
g = g.rename(columns={'Unique Key': 'Shhot'})

In [36]:
g

Unnamed: 0,ZIPCODE,INCIDENT_KEY
0,00083,1
1,10001,43
2,10002,95
3,10003,29
4,10004,2
...,...,...
166,11435,146
167,11436,76
168,11691,365
169,11692,124


In [None]:
g.to_csv('ShootMetrix.csv')