In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

# Crime data

In [2]:
geo_crime_data = gpd.read_file('data/sf/crime_data.shp')
print(geo_crime_data.shape)
geo_crime_data.head()

(236370, 5)


Unnamed: 0,X,Y,DATETIME,TARGET,geometry
0,-76.624824,39.252974,2018/04/02 04:00:00.000,ASALTO,POINT (-76.62482 39.25297)
1,-76.592225,39.343284,2019/02/01 08:00:00.000,HURTO,POINT (-76.59222 39.34328)
2,-76.625705,39.284369,2017/09/27 21:00:00.000,HURTO,POINT (-76.62570 39.28437)
3,-76.478282,39.270194,2017/03/21 11:00:00.000,FALSO,POINT (-76.47828 39.27019)
4,-76.576006,39.291228,2017/03/07 02:00:00.000,ROBO,POINT (-76.57601 39.29123)


geo_crime_data.plot('TARGET', figsize=(15, 15), legend=True)

## Census blocks 2020

In [3]:
census_blocks_2020 = gpd.read_file("data/sf/Maryland_Census_Boundaries_-_Census_Blocks_2020/Maryland_Census_Boundaries_-_Census_Blocks_2020.shp")
#census_blocks_2020 = census_blocks_2020.to_crs(geo_crime_data.crs)
geo_crime_data = geo_crime_data.to_crs(census_blocks_2020.crs)
print(census_blocks_2020.shape)
census_blocks_2020.head()

(83827, 20)


Unnamed: 0,OBJECTID,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,MTFCC20,UR20,UACE20,UATYPE20,FUNCSTAT20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,TotalAREA,ShapeSTAre,ShapeSTLen,geometry
0,83828,24,1,2000,5020,240010020005020,Block 5020,G5040,,,,S,22569,0,39.5796393,-78.8500248,22570.424474,38041.328125,867.647957,"POLYGON ((-8777665.165 4804980.141, -8777644.7..."
1,83829,24,1,2300,4006,240010023004006,Block 4006,G5040,,,,S,580462,0,39.6687342,-78.7799376,580514.693183,980892.21875,4912.745777,"POLYGON ((-8770308.503 4817994.771, -8770253.1..."
2,83830,24,1,1700,3052,240010017003052,Block 3052,G5040,,,,S,14394,0,39.6493444,-78.9023796,14395.022531,24309.800781,806.11026,"POLYGON ((-8783545.730 4815087.769, -8783483.8..."
3,83831,24,1,1503,1011,240010015031011,Block 1011,G5040,,,,S,12666,0,39.6991057,-78.8263345,12667.200209,21422.164062,1083.581318,"POLYGON ((-8775087.785 4822410.682, -8775081.1..."
4,83832,24,1,2000,4027,240010020004027,Block 4027,G5040,,,,S,42591,0,39.5830209,-78.8483373,42593.480518,71796.070312,1337.315526,"POLYGON ((-8777634.552 4805728.467, -8777510.7..."


In [4]:
geo_crime_data = (
    geo_crime_data
    .sjoin(census_blocks_2020[["GEOID20", "TotalAREA", 'geometry']], how = 'left')
    .drop(columns = 'index_right')
    .rename(columns = {'GEOID20': 'GeoIdBlockCensus', 'TotalAREA': 'TotalAreaBlockCensus'})
)
print(geo_crime_data.shape)
geo_crime_data.head()

(236370, 7)


Unnamed: 0,X,Y,DATETIME,TARGET,geometry,GeoIdBlockCensus,TotalAreaBlockCensus
0,-76.624824,39.252974,2018/04/02 04:00:00.000,ASALTO,POINT (-8529836.395 4757972.943),245102502072001,69042.148089
1,-76.592225,39.343284,2019/02/01 08:00:00.000,HURTO,POINT (-8526207.441 4770964.002),245102709031016,4196.749506
2,-76.625705,39.284369,2017/09/27 21:00:00.000,HURTO,POINT (-8529934.426 4762487.253),245102201001005,15926.414256
3,-76.478282,39.270194,2017/03/21 11:00:00.000,FALSO,POINT (-8513523.392 4760448.784),240054525001001,493438.317358
4,-76.576006,39.291228,2017/03/07 02:00:00.000,ROBO,POINT (-8524401.959 4763473.817),245100102004002,7452.528531


In [5]:
geo_crime_data.GeoIdBlockCensus.isnull().value_counts()

False    236370
Name: GeoIdBlockCensus, dtype: int64

geo_crime_data.plot('GeoIdBlockCensus')

# Marylant Transit MTA Bus Stops

In [6]:
bus_stops = gpd.read_file("data/sf/Maryland_Transit_-_MTA_Bus_Stops.shp")
print(bus_stops.shape)
bus_stops.head()

(4536, 15)


Unnamed: 0,X,Y,OBJECTID,stop_name,Rider_On,Rider_Off,Rider_Tota,Stop_Rider,Routes_Ser,Distributi,Mode,Shelter,County,stop_id,geometry
0,-8533796.0,4772067.0,50034,CYLBURN AVE & GREENSPRING AVE fs wb,151.0,123.0,274.0,224.0,"94, 31, 91",E1 - Public Domain - Internal Use Only,Bus,Yes,Baltimore City,1,POINT (-8533795.913 4772066.833)
1,-8534126.0,4772153.0,50035,LANIER AVE & SINAI HOSPITAL sb,17.0,12.0,29.0,2134.0,"94, 31, 91",E1 - Public Domain - Internal Use Only,Bus,No,Baltimore City,3,POINT (-8534126.086 4772153.208)
2,-8534252.0,4772327.0,50036,LANIER AVE & BELVEDERE AVE nb,54.0,2.0,56.0,1380.0,"94, 31, 91",E1 - Public Domain - Internal Use Only,Bus,No,Baltimore City,4,POINT (-8534252.211 4772326.537)
3,-8533555.0,4771464.0,50037,YELLOWOOD AVE & FLAX TERR OPP sb,31.0,37.0,68.0,1184.0,91,E1 - Public Domain - Internal Use Only,Bus,No,Baltimore City,8,POINT (-8533555.463 4771463.810)
4,-8533173.0,4771681.0,50038,TAMARIND RD & SPRINGARDEN DR nb,13.0,10.0,23.0,2377.0,91,E1 - Public Domain - Internal Use Only,Bus,No,Baltimore City,10,POINT (-8533172.969 4771680.601)


fig, ax = plt.subplots(figsize = (15, 15))
geo_crime_data.plot(ax = ax, color = 'red')
bus_stops.plot(ax = ax, color = 'blue')

In [7]:
geo_crime_data = geo_crime_data.merge(
bus_stops.to_crs(census_blocks_2020.crs).sjoin(
    census_blocks_2020[["GEOID20", 'geometry']],
    how = 'left'
).drop(
    columns = 'index_right'
)['GEOID20'].value_counts(    
).reset_index(    
).rename(
    columns = {'index': 'GeoIdBlockCensus', 'GEOID20': 'BusStopsCount'}
    ),
    on = 'GeoIdBlockCensus',
    how = 'left'
)
geo_crime_data["BusStopsCount"] = geo_crime_data["BusStopsCount"].fillna(0)
print(geo_crime_data.shape)
geo_crime_data.head()

(236370, 8)


Unnamed: 0,X,Y,DATETIME,TARGET,geometry,GeoIdBlockCensus,TotalAreaBlockCensus,BusStopsCount
0,-76.624824,39.252974,2018/04/02 04:00:00.000,ASALTO,POINT (-8529836.395 4757972.943),245102502072001,69042.148089,0.0
1,-76.592225,39.343284,2019/02/01 08:00:00.000,HURTO,POINT (-8526207.441 4770964.002),245102709031016,4196.749506,0.0
2,-76.625705,39.284369,2017/09/27 21:00:00.000,HURTO,POINT (-8529934.426 4762487.253),245102201001005,15926.414256,0.0
3,-76.478282,39.270194,2017/03/21 11:00:00.000,FALSO,POINT (-8513523.392 4760448.784),240054525001001,493438.317358,0.0
4,-76.576006,39.291228,2017/03/07 02:00:00.000,ROBO,POINT (-8524401.959 4763473.817),245100102004002,7452.528531,1.0


In [8]:
geo_crime_data.BusStopsCount.value_counts()

0.0     156064
1.0      49629
2.0      18135
3.0       6477
4.0       2331
5.0       1309
17.0       624
6.0        589
7.0        428
11.0       360
8.0        252
9.0        155
13.0        17
Name: BusStopsCount, dtype: int64

In [9]:
_ = geo_crime_data.sjoin_nearest(
    bus_stops.to_crs(geo_crime_data.crs)[['stop_id', 'X', 'Y', 'geometry']].rename(columns = {'X': 'XBusStop', 'Y': 'YBusStop'}),
    max_distance = 1000,
    how = 'left'
).drop(
    columns = 'index_right'
)
_ = _.drop_duplicates(subset= _.columns[:-3])
geo_crime_data = geo_crime_data.assign(
    DistanceBusStop = _.apply(lambda x: ((x.X - x.XBusStop)**2 + (x.Y - x.YBusStop)**2)**0.5 , axis = 1).values
)
geo_crime_data["DistanceBusStop"] = geo_crime_data["DistanceBusStop"].fillna(max(geo_crime_data["DistanceBusStop"]*2))
print(geo_crime_data.shape)
geo_crime_data.head()

(236370, 9)


Unnamed: 0,X,Y,DATETIME,TARGET,geometry,GeoIdBlockCensus,TotalAreaBlockCensus,BusStopsCount,DistanceBusStop
0,-76.624824,39.252974,2018/04/02 04:00:00.000,ASALTO,POINT (-8529836.395 4757972.943),245102502072001,69042.148089,0.0,9767034.0
1,-76.592225,39.343284,2019/02/01 08:00:00.000,HURTO,POINT (-8526207.441 4770964.002),245102709031016,4196.749506,0.0,9770183.0
2,-76.625705,39.284369,2017/09/27 21:00:00.000,HURTO,POINT (-8529934.426 4762487.253),245102201001005,15926.414256,0.0,9769345.0
3,-76.478282,39.270194,2017/03/21 11:00:00.000,FALSO,POINT (-8513523.392 4760448.784),240054525001001,493438.317358,0.0,9753389.0
4,-76.576006,39.291228,2017/03/07 02:00:00.000,ROBO,POINT (-8524401.959 4763473.817),245100102004002,7452.528531,1.0,9765019.0


# Community Statistical Areas Reference Boundaries

In [12]:
csas = gpd.read_file("data/sf/Baltimore/Community_Statistical_Areas_(CSAs)__Reference_Boundaries/Community_Statistical_Areas_(CSAs)__Reference_Boundaries.shp")
print(csas.shape)
csas.head()

(56, 6)


Unnamed: 0,FID,Community,Neigh,Tracts,Link,geometry
0,1,Allendale/Irvington/S. Hilton,"Allendale, Carroll-South Hilton, Gwynns Falls,...","280404, 200701, 200600, 200702, 200800, 250102",http://bniajfi.org/community/Allendale_Irvingt...,"POLYGON ((-8533446.900 4761284.000, -8533447.0..."
1,2,Beechfield/Ten Hills/West Hills,"Beechfield, Hunting Ridge, Ten Hills, Tremont,...","280403, 280401, 250101",http://bniajfi.org/community/Beechfield_Ten%20...,"POLYGON ((-8537625.100 4765025.100, -8537609.4..."
2,3,Belair-Edison,"Belair-Edison, Clifton Park, Four By Four, May...","260301, 080102, 080101, 260302",http://bniajfi.org/community/Belair-Edison/,"POLYGON ((-8523467.900 4768528.400, -8523451.2..."
3,4,Brooklyn/Curtis Bay/Hawkins Point,"Brooklyn, Curtis Bay, Fairfield Area, Hawkins ...","250500, 250600, 250401, 250402",http://bniajfi.org/community/Brooklyn_Curtis%2...,"MULTIPOLYGON (((-8525811.500 4752203.100, -852..."
4,5,Canton,"Canton, Patterson Park","010400, 010300, 010100",http://bniajfi.org/community/Canton,"POLYGON ((-8523889.000 4762493.800, -8523886.9..."


In [13]:
geo_crime_data = geo_crime_data.sjoin(
    csas.to_crs(geo_crime_data.crs)[['Community', 'geometry']],
    how = 'left'
).drop(
    columns = 'index_right'
).rename(
    columns = {'Community': 'CSA2020'}
)
print(geo_crime_data.shape)
geo_crime_data.head()

(236380, 10)


Unnamed: 0,X,Y,DATETIME,TARGET,geometry,GeoIdBlockCensus,TotalAreaBlockCensus,BusStopsCount,DistanceBusStop,CSA2020
0,-76.624824,39.252974,2018/04/02 04:00:00.000,ASALTO,POINT (-8529836.395 4757972.943),245102502072001,69042.148089,0.0,9767034.0,Cherry Hill
1,-76.592225,39.343284,2019/02/01 08:00:00.000,HURTO,POINT (-8526207.441 4770964.002),245102709031016,4196.749506,0.0,9770183.0,Northwood
2,-76.625705,39.284369,2017/09/27 21:00:00.000,HURTO,POINT (-8529934.426 4762487.253),245102201001005,15926.414256,0.0,9769345.0,Inner Harbor/Federal Hill
3,-76.478282,39.270194,2017/03/21 11:00:00.000,FALSO,POINT (-8513523.392 4760448.784),240054525001001,493438.317358,0.0,9753389.0,
4,-76.576006,39.291228,2017/03/07 02:00:00.000,ROBO,POINT (-8524401.959 4763473.817),245100102004002,7452.528531,1.0,9765019.0,Patterson Park North & East


# 911 Calls

In [15]:
# Se puede hacer el join con las variables fecha y community statistical areas
# Se lee como gdf y luego se hace un simple merge por estas variables.

# Subdirectorio data/sf/Baltimore

In [None]:
# Todas estas se pueden unir con un simple merge utilizando la variable CSA2020


data\sf\Maryland_Census_Boundaries_-_Census_Block_Groups_2020 polygons 
data\sf\Maryland_American_Community_Survey_-_ACS_Census_Tracts points
data\sf\Maryland_Annual_Average_Daily_Traffic_-_Annual_Average_Daily_Traffic_(SHA_Statewi points
data\sf\Maryland_Education_Facilities_-_PreK_thru_12_Education_(Public_Schools) points
data\sf\Maryland_Fire_-_County_Fire_Stations points
data\sf\Maryland_Housing_Designated_Areas_-_Qualified_Census_Tracts polygons
data\sf\Maryland_Incentive_Zones_-_Heritage_Areas polygons
data\sf\Maryland_Incentive_Zones_-_Opportunity_Zones polygons
data\sf\Maryland_Multifamily_Sites_-_Multifamily_Sites points
data\sf\Maryland_Police_-_County_Police_Stations points
data\sf\Maryland_Traffic_Cameras_-_Traffic_Cameras points

data\sf\Baltimore\Community_Statistical_Areas_(CSAs)__Reference_Boundaries
data\sf\Baltimore\Affordability_Index_-_Mortgage

# FBI Maryland Crime Data
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_ARRESTEE.csv Por fecha
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_incident.csv ídem 
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_month.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_OFFENDER.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_OFFENSE.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_PROPERTY.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_PROPERTY_DESC.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_SUSPECT_USING.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_SUSPECTED_DRUG.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_VICTIM.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_VICTIM_INJURY.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_VICTIM_OFFENDER_REL.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_VICTIM_OFFENSE.csv ídem
file:///C:/Users/drdsd/Dropbox/Datathon/Datathon 2022/data/FBI/2021/NIBRS_WEAPON.csv ídem

In [14]:
geo_crime_data.drop(columns = ['geometry']).to_csv('data/crime_data2.csv', index = False)