# Data Preparation

#### Parsing the json **nyc_geo.json** into the dataframe with the following columns:
- Borough
- Neighborhood
- Latitude
- Longitude

In [49]:
import pandas as pd

In [50]:
import json
f = open('nyc_geo.json')
data = json.load(f)

In [51]:
geo_list = []
for i in data['features']:
    feature_dict = {'borough': i['properties']['borough'],
                    'neighborhood': i['properties']['name'], 
                    'lat': i['geometry']['coordinates'][0],
                    'long': i['geometry']['coordinates'][1],
                    'bbox': i['properties']['bbox']}
    geo_list.append(feature_dict)

In [52]:
geo_df = pd.DataFrame(geo_list)

In [53]:
geo_df.head(2)

Unnamed: 0,borough,neighborhood,lat,long,bbox
0,Bronx,Wakefield,-73.847201,40.894705,"[-73.84720052054902, 40.89470517661, -73.84720..."
1,Bronx,Co-op City,-73.829939,40.874294,"[-73.82993910812398, 40.87429419303012, -73.82..."


---
#### Use different data sources and APIs to collect information about the neigborhoods that can be used for segmentation.

In [54]:
arrest_df = pd.read_csv('NYPD_Arrest_Data__Year_to_Date_.csv')

In [55]:
arrest_df.head(2)

Unnamed: 0,ARREST_KEY,ARREST_DATE,PD_CD,PD_DESC,KY_CD,OFNS_DESC,LAW_CODE,LAW_CAT_CD,ARREST_BORO,ARREST_PRECINCT,JURISDICTION_CODE,AGE_GROUP,PERP_SEX,PERP_RACE,X_COORD_CD,Y_COORD_CD,Latitude,Longitude,New Georeferenced Column
0,238859078,01/09/2022,,(null),,(null),PL 2650022,M,B,49,0,25-44,M,BLACK HISPANIC,1021536,251417,40.85668,-73.865212,POINT (-73.865212 40.85668)
1,239923883,01/31/2022,,(null),,(null),CPL5700600,9,Q,113,3,25-44,M,BLACK,1046367,186986,40.679701,-73.776047,POINT (-73.77604735 40.67970059)


In [56]:
air_df = pd.read_csv('Air_Quality.csv')

In [57]:
air_df.head(2)

Unnamed: 0,Unique ID,Indicator ID,Name,Measure,Measure Info,Geo Type Name,Geo Join ID,Geo Place Name,Time Period,Start_Date,Data Value,Message
0,216498,386,Ozone (O3),Mean,ppb,CD,313,Coney Island (CD13),Summer 2013,06/01/2013,34.64,
1,216499,386,Ozone (O3),Mean,ppb,CD,313,Coney Island (CD13),Summer 2014,06/01/2014,33.22,


In [59]:
eviction_df = pd.read_csv('Evictions.csv')

In [60]:
eviction_df.head(2)

Unnamed: 0,Court Index Number,Docket Number,Eviction Address,Eviction Apartment Number,Executed Date,Marshal First Name,Marshal Last Name,Residential/Commercial,BOROUGH,Eviction Postcode,Ejectment,Eviction/Legal Possession,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA
0,70491/17,169289,120 ALDRICH ST.,16F,10/10/2018,Alfred,Locascio,Residential,BRONX,10475,Not an Ejectment,Possession,40.870146,-73.831665,10.0,12.0,46201.0,2128836.0,2051410000.0,Co-op City
1,55498/18A,487863,3041 HOLLAND AVENUE,UNIT 55N,06/19/2019,Danny,Weinheim,Residential,BRONX,10467,Not an Ejectment,Possession,40.870179,-73.865262,12.0,15.0,338.0,2055590.0,2045690000.0,Bronxdale


---
#### EDA 

In [63]:
geo_df.head(1)

Unnamed: 0,borough,neighborhood,lat,long,bbox
0,Bronx,Wakefield,-73.847201,40.894705,"[-73.84720052054902, 40.89470517661, -73.84720..."


In [72]:
arrest_df.head(2)

Unnamed: 0,ARREST_DATE,PD_DESC,OFNS_DESC,LAW_CAT_CD,ARREST_BORO,AGE_GROUP,PERP_SEX,PERP_RACE,Latitude,Longitude,New Georeferenced Column
0,01/09/2022,(null),(null),M,B,25-44,M,BLACK HISPANIC,40.85668,-73.865212,POINT (-73.865212 40.85668)
1,01/31/2022,(null),(null),9,Q,25-44,M,BLACK,40.679701,-73.776047,POINT (-73.77604735 40.67970059)


In [73]:
#### Dropping Unnessesary Data

In [70]:
arrest_df.drop(['ARREST_KEY', 'PD_CD', 'KY_CD', 'LAW_CODE', 'ARREST_PRECINCT', 'JURISDICTION_CODE', 'X_COORD_CD', 'Y_COORD_CD'], axis=1, inplace=True)

In [71]:
arrest_df

Unnamed: 0,ARREST_DATE,PD_DESC,OFNS_DESC,LAW_CAT_CD,ARREST_BORO,AGE_GROUP,PERP_SEX,PERP_RACE,Latitude,Longitude,New Georeferenced Column
0,01/09/2022,(null),(null),M,B,25-44,M,BLACK HISPANIC,40.856680,-73.865212,POINT (-73.865212 40.85668)
1,01/31/2022,(null),(null),9,Q,25-44,M,BLACK,40.679701,-73.776047,POINT (-73.77604735 40.67970059)
2,01/25/2022,RAPE 3,RAPE,F,K,25-44,M,BLACK,40.664121,-73.947765,POINT (-73.9477648403751 40.664121282631)
3,03/03/2022,RAPE 1,RAPE,F,K,18-24,M,BLACK,40.695439,-73.983225,POINT (-73.9832253756043 40.6954388081238)
4,02/22/2022,RAPE 1,RAPE,F,B,45-64,M,BLACK,40.816206,-73.896001,POINT (-73.8960011932583 40.8162058439227)
...,...,...,...,...,...,...,...,...,...,...,...
140559,09/21/2022,OBSTR BREATH/CIRCUL,ASSAULT 3 & RELATED OFFENSES,M,B,25-44,M,WHITE HISPANIC,40.816057,-73.895785,POINT (-73.895785 40.816057)
140560,08/15/2022,STRANGULATION 1ST,FELONY ASSAULT,F,K,25-44,M,BLACK,40.630600,-73.973705,POINT (-73.9737053160275 40.6305998504358)
140561,07/06/2022,"LARCENY,PETIT FROM OPEN AREAS,",PETIT LARCENY,M,K,45-64,M,BLACK,40.684454,-73.977750,POINT (-73.97775 40.684454)
140562,09/13/2022,"DRUG PARAPHERNALIA, POSSESSE",DANGEROUS DRUGS,M,B,25-44,F,BLACK,40.830027,-73.872754,POINT (-73.87275417 40.83002685)


---
#### Data Visualization

In [61]:
import matplotlib.pyplot as plt