In [1]:
from shapely import geometry, ops
from geopandas.tools import sjoin
from shapely.geometry.polygon import Polygon
from shapely.geometry import Point
from shapely.ops import cascaded_union
from geopandas import datasets, GeoDataFrame, read_file

import numpy as np
import shapely.geometry as shg
import random
import pandas as pd
import geopandas as gpd
import folium
import json
import geojson
import branca
import re
import string
import unicodedata

In [2]:
# Function to compare if there are x features for every block in the city
def check_sort_Hos(Hos_list, geom):
    points_within = []
    for i in range(len(Hos_list)):
        p = Hos_list[i]
        if p.within(geom) == True:
            points_within.append(p)
    return points_within

## Load city block file 

In [3]:
DaneBog = gpd.read_file('/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/spatial_features/city_block/manz.shp')
DaneBog.to_crs(epsg=4326, inplace=True)

In [4]:
DaneBog.head(2)

Unnamed: 0,OBJECTID,MANCODIGO,SECCODIGO,GLOBALID,Shape_Leng,Shape_Area,geometry
0,1,4570047,4570,{8E286507-9BD2-41B1-9B35-4F73176681DB},0.002392,3.030244e-07,"POLYGON ((-74.18854 4.60958, -74.18854 4.60955..."
1,2,2566091,2566,{2DC587C9-016D-47A8-921F-EF046A99628A},0.000874,4.740736e-08,"POLYGON ((-74.11275 4.54065, -74.11280 4.54064..."


In [5]:
len(DaneBog['MANCODIGO']) #Check number of city blocks

43952

In [6]:
any(DaneBog['MANCODIGO'].duplicated(keep=False)) # Check if there are duplicated codes

False

In [7]:
#Buffer city blocks otherwise a lot of points won't appear to be 'in' them
DaneBog['geometry'] = DaneBog['geometry'].buffer(0.00007)


  


## Hospitals in Bogotá

In [8]:
# DataFrame with locations of all Hospitals in Red Adscrita de Salud for Bogotá
file_path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/spatial_features/Health/RASA.shp'
HosBog = gpd.read_file(file_path)
HosBog.to_crs(epsg=4326, inplace=True)
del file_path
HosBog.head(2)

Unnamed: 0,OBJECTID,COHABILITA,RSOENTADSC,DIRSIREP,TELEFONO,FAX,COELECTRO,NIT,CLPRESTADO,NIVEL,CAPS,NPRE,NJURIDICA,CPERSONA,CPRESTADOR,geometry
0,1,110013000000.0,UNIDAD DE SERVICIOS DE SALUD LA VICTORIA 1,CL 37 BIS B SUR # 3 25 ESTE,2091480,Sin Dato,subcentrooriente@saludcapital.gov.co,900959051,Instituciones - IPS,3.0,1,SUBRED INTEGRADA DE SERVICIOS DE SALUD CENTRO ...,Publica,Juridica,1100130289,POINT (-74.09404 4.55355)
1,2,110013000000.0,UNIDAD DE SERVICIOS DE SALUD LOS ALPES,TV 10 B ESTE # 36 D 13 SUR,2091480-2060181-2074271,Sin Dato,subcentrooriente@saludcapital.gov.co,900959051,Instituciones - IPS,3.0,2,SUBRED INTEGRADA DE SERVICIOS DE SALUD CENTRO ...,Publica,Juridica,1100130289,POINT (-74.08368 4.55391)


In [9]:
# Number of Hospitals reported in file
len(HosBog['geometry'])

160

In [10]:
#Get the list of points where hospitals are 
HosList = HosBog['geometry'].tolist()
del HosBog

#Apply Function and count Hospitals per Block 
DaneBog['Hosp_Point'] = DaneBog.apply(lambda row: check_sort_Hos(HosList, row['geometry']), axis=1)
DaneBog['N_Hosp'] = DaneBog.apply(lambda row: len(row['Hosp_Point']), axis=1)

## IPS 

In [11]:
# DataFrame with locations of all IPS in Bogotá (institución prestadora de salud)
file_path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/spatial_features/Health/ips.shp'
IPSBog = gpd.read_file(file_path)
IPSBog.to_crs(epsg=4326, inplace=True)
del file_path
IPSBog.head(1)

Unnamed: 0,OBJECTID,Id,departamen,municipio,codigo_pre,nombre_pre,nombre,tipo_zona,direccion,barrio,...,sede_princ,horario_lu,horario_ma,horario_mi,horario_ju,horario_vi,horario_sa,horario_do,fecha_cort,geometry
0,1.0,1.0,Bogotá D.C,BOGOTÁ,1100100032,COOPERATIVA PARA LA SALUD ORAL ORALCOOP,COOPERATIVA PARA LA SALUD ORAL ORALCOOP,URBANA,KR 64 # 100 55,SUBA,...,SI,,,,,,,,Fecha corte REPS: Jul 23 2020 11:22AM,POINT (-74.06809 4.69053)


In [12]:
#Get list of Latitude Longitude points where IPS are located in Bogotá City 
IPSList = IPSBog['geometry'].tolist()
# Number of IPS reported in file
len(IPSBog['geometry'].tolist())

2900

In [13]:
#Apply Function and count IPS per Block 
DaneBog['IPS_Point'] = DaneBog.apply(lambda row: check_sort_Hos(IPSList, row['geometry']), axis=1)
DaneBog['N_IPS'] = DaneBog.apply(lambda row: len(row['IPS_Point']), axis=1)
DaneBog.head(2)

Unnamed: 0,OBJECTID,MANCODIGO,SECCODIGO,GLOBALID,Shape_Leng,Shape_Area,geometry,Hosp_Point,N_Hosp,IPS_Point,N_IPS
0,1,4570047,4570,{8E286507-9BD2-41B1-9B35-4F73176681DB},0.002392,3.030244e-07,"POLYGON ((-74.18847 4.60955, -74.18848 4.60953...",[],0,[],0
1,2,2566091,2566,{2DC587C9-016D-47A8-921F-EF046A99628A},0.000874,4.740736e-08,"POLYGON ((-74.11274 4.54058, -74.11280 4.54057...",[],0,[],0


In [14]:
del IPSList, IPSBog

## Schools in Bogotá

In [15]:
# Load file
colegios = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/spatial_features/colegios_062020.geojson'
f = open(colegios,)
col_data = json.load(f)

In [16]:
#Get list of Latitude Longitude points where schools are located in Bogotá City
col_point_list = []
for i in range(len(col_data['features'])):
    x = col_data['features'][i]['geometry']['coordinates'][0] #long
    y = col_data['features'][i]['geometry']['coordinates'][1] #lat
    
    p = Point(x, y)
    col_point_list.append(p)
    
f.close()

In [17]:
# Number of schools reported in file
len(col_point_list)

2539

In [18]:
#Apply Function and count schools per Block
DaneBog['Col_Point'] = DaneBog.apply(lambda row: check_sort_Hos(col_point_list, row['geometry']), axis=1)
DaneBog['N_Col'] = DaneBog.apply(lambda row: len(row['Col_Point']), axis=1)

DaneBog.head(2)

Unnamed: 0,OBJECTID,MANCODIGO,SECCODIGO,GLOBALID,Shape_Leng,Shape_Area,geometry,Hosp_Point,N_Hosp,IPS_Point,N_IPS,Col_Point,N_Col
0,1,4570047,4570,{8E286507-9BD2-41B1-9B35-4F73176681DB},0.002392,3.030244e-07,"POLYGON ((-74.18847 4.60955, -74.18848 4.60953...",[],0,[],0,[],0
1,2,2566091,2566,{2DC587C9-016D-47A8-921F-EF046A99628A},0.000874,4.740736e-08,"POLYGON ((-74.11274 4.54058, -74.11280 4.54057...",[],0,[],0,[],0


In [19]:
del colegios, f, col_point_list

## Market Places in Bogotá

In [20]:
#Load file
path = "/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/spatial_features/plazas-de-mercado.csv"
PlazMerBog = pd.read_csv(path, encoding = 'UTF-8',  sep='\t', delimiter=";")

PlazMerBog['coord_x'] = PlazMerBog['coord_x'].str.replace(',', '.', regex=False)
PlazMerBog['coord_y'] = PlazMerBog['coord_y'].str.replace(',', '.', regex=False)

PlazMerBog.head(2)

Unnamed: 0,Id,Nombre,Dirección,Localidad,Barrio,Teléfono,Horario,Descripcion,Numero de locales,Sitio web,coord_x,coord_y
0,1,Plaza de Mercado Doce de Octubre,Calle 72 # 51 - 62,Barrios Unidos,DOCE DE OCTUBRE,2509756,"Lunes, martes, miércoles, viernes y sábado: 7:...","La Plaza de Mercado Doce de Octubre, fundada e...",237,http://www.ipes.gov.co,-74.0744208,4.669080684
1,2,Plaza de Mercado Fontibón,Calle 19 # 103 - 26,Fontibón,SAN JOSE DE LAS PISTAS,4213606,"Lunes, martes, jueves, viernes y Sabado 7:30 A...","La Plaza de Mercado Distrital de Fontibón, fun...",333,http://www.ipes.gov.co,-74.14450816,4.676126732


In [21]:
#Get list of Latitude Longitude points where market places are located in Bogotá City
PlazMer_point_list = []
for i in range(len(PlazMerBog['coord_x'])):
    x = PlazMerBog.loc[i, 'coord_x']
    y = PlazMerBog.loc[i, 'coord_y']
    
    x = float(x)
    y = float(y)
    
    p = Point(x, y)
    PlazMer_point_list.append(p)
    
del PlazMerBog

#Number of Market places reported
len(PlazMer_point_list)

19

In [22]:
#Apply Function and count marketplaces per Block
DaneBog['PlazMer_Point'] = DaneBog.apply(lambda row: check_sort_Hos(PlazMer_point_list, row['geometry']), axis=1)
DaneBog['N_PlazMer'] = DaneBog.apply(lambda row: len(row['PlazMer_Point']), axis=1)

DaneBog.head(2)

Unnamed: 0,OBJECTID,MANCODIGO,SECCODIGO,GLOBALID,Shape_Leng,Shape_Area,geometry,Hosp_Point,N_Hosp,IPS_Point,N_IPS,Col_Point,N_Col,PlazMer_Point,N_PlazMer
0,1,4570047,4570,{8E286507-9BD2-41B1-9B35-4F73176681DB},0.002392,3.030244e-07,"POLYGON ((-74.18847 4.60955, -74.18848 4.60953...",[],0,[],0,[],0,[],0
1,2,2566091,2566,{2DC587C9-016D-47A8-921F-EF046A99628A},0.000874,4.740736e-08,"POLYGON ((-74.11274 4.54058, -74.11280 4.54057...",[],0,[],0,[],0,[],0


In [23]:
del PlazMer_point_list

## Tourist attractions in Bogotá 

In [25]:
# Load file
path = "/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/spatial_features/Tourism/ITur.shp"
ITurBog = gpd.read_file(path)
ITurBog.to_crs(epsg=4326, inplace=True)

ITurBog.head(2)

Unnamed: 0,OBJECTID,CODIGO,NOMBRE,DIRECCION,TIPO_DE_PA,ICONOGRAFI,NOMBRE_PRO,DIRECCIO_1,CORREO_PRO,LATITUD,LONGITUD,ID,TELEFONO,geometry
0,1,11001.1.1.2.2.6.1.3499,Parque Cedro Golf Club,Carrera 7A # 150 - 85,Patrimonio cultural material inmueble,Atractivo Cultural,Instituto Distrital de Recreación y Deporte - ...,Calle 63 # 59A - 06,atncliente@idrd.gov.co,4.725616,-74.026808,3499,5716605400,POINT (-74.02681 4.72562)
1,2,11001.1.3.13.2.6.1.3500,Vitrina Turística ANATO,Carerra 37 # 24 - 67,Patrimonio cultural inmaterial,Atractivo Cultural,Asociación Colombiana de Agencias de Viajes y ...,Carrera 19B # 83 - 63 Edificio ANATO Piso 8,eventos@anato.org,4.629172,-74.089762,3500,5714322040,POINT (-74.08976 4.62917)


In [26]:
#Get list of Latitude Longitude points where touristic attractions are located in Bogotá City 
ITurList = ITurBog['geometry'].tolist()

# Number of Touristic points reported in file
len(ITurBog['geometry'].tolist())

442

In [27]:
#Apply Function and count Touristic points per Block 
DaneBog['ITur_Point'] = DaneBog.apply(lambda row: check_sort_Hos(ITurList, row['geometry']), axis=1)
DaneBog['N_ITur'] = DaneBog.apply(lambda row: len(row['ITur_Point']), axis=1)

DaneBog.head(2)

Unnamed: 0,OBJECTID,MANCODIGO,SECCODIGO,GLOBALID,Shape_Leng,Shape_Area,geometry,Hosp_Point,N_Hosp,IPS_Point,N_IPS,Col_Point,N_Col,PlazMer_Point,N_PlazMer,ITur_Point,N_ITur
0,1,4570047,4570,{8E286507-9BD2-41B1-9B35-4F73176681DB},0.002392,3.030244e-07,"POLYGON ((-74.18847 4.60955, -74.18848 4.60953...",[],0,[],0,[],0,[],0,[],0
1,2,2566091,2566,{2DC587C9-016D-47A8-921F-EF046A99628A},0.000874,4.740736e-08,"POLYGON ((-74.11274 4.54058, -74.11280 4.54057...",[],0,[],0,[],0,[],0,[],0


In [28]:
del ITurList, ITurBog

## SITP ()

In [29]:
# Load file 
sitp = '/home/ubuntu/javeriana/MOTUS-PUJ/Bog_Estimation/DANE/paraderos_SITP.json'
f = open(sitp,)
sitp_data = json.load(f)

#Get list of Latitude Longitude points where bus stops are located in Bogotá City
sitp_point_list = []
sitp_stop_name = []

for i in range(len(sitp_data['features'])):
    x = sitp_data['features'][i]['geometry']['x'] #long
    y = sitp_data['features'][i]['geometry']['y'] #lat
    n = sitp_data['features'][i]['attributes']['nombre_par']
    
    p = Point(x, y)
    sitp_point_list.append(p)
    sitp_stop_name.append(n)
    
f.close()

#Number of bus stops reported in file
len(sitp_point_list)

7653

In [30]:
# Create a DataFrame in which we have all SITP Points in Bogotá
sitp_Point_df = GeoDataFrame(sitp_point_list, columns=['geometry'])
sitp_Point_df['Nombre_Parada'] = sitp_stop_name
sitp_Point_df.crs = "EPSG:4326"
sitp_Point_df.to_crs(epsg=4326, inplace=True)

sitp_Point_df.head(2)

Unnamed: 0,geometry,Nombre_Parada
0,POINT (-74.14663 4.40493),Santa Bárbara
1,POINT (-74.12839 4.46837),Usme Rural


In [31]:
# Make Spatial Join between SITP Points and Bogotá city blocks
DaneBog = sjoin(DaneBog, sitp_Point_df, how="left", op='intersects')
DaneBog = DaneBog.fillna(-1)
DaneBog['sitp_Point'] = DaneBog.apply(lambda row: sitp_Point_df.loc[row['index_right'] ,'geometry'] if row['index_right'] != -1 else -1, axis=1)
DaneBog.head(1)

Unnamed: 0,OBJECTID,MANCODIGO,SECCODIGO,GLOBALID,Shape_Leng,Shape_Area,geometry,Hosp_Point,N_Hosp,IPS_Point,N_IPS,Col_Point,N_Col,PlazMer_Point,N_PlazMer,ITur_Point,N_ITur,index_right,Nombre_Parada,sitp_Point
0,1,4570047,4570,{8E286507-9BD2-41B1-9B35-4F73176681DB},0.002392,3.030244e-07,"POLYGON ((-74.18847 4.60955, -74.18848 4.60953...",[],0,[],0,[],0,[],0,[],0,-1.0,-1,-1


In [32]:
# Since spatial join clones rows we need to group them by block to really count how many bus stops there are in a block
DaneBog_sitp = DaneBog.groupby(['MANCODIGO', 'index_right'])['sitp_Point'].size()
DaneBog_sitp = pd.DataFrame(DaneBog_sitp)
DaneBog_sitp = DaneBog_sitp.reset_index(drop=False)
DaneBog_sitp['Points'] = DaneBog_sitp.apply(lambda row: sitp_Point_df.loc[row['index_right'] ,'geometry'] if row['index_right'] != -1 else -1 ,axis=1)

#Check up the DataFrame
DaneBog_sitp.iloc[160:170, :]

Unnamed: 0,MANCODIGO,index_right,sitp_Point,Points
160,1106067,1208.0,1,POINT (-74.08342825378065 4.570310221884618)
161,1106070,-1.0,1,-1
162,1107001,-1.0,1,-1
163,1107002,-1.0,1,-1
164,1107003,-1.0,1,-1
165,1107004,-1.0,1,-1
166,1107005,1124.0,1,POINT (-74.08269051339558 4.568437939466808)
167,1107006,-1.0,1,-1
168,1107007,-1.0,1,-1
169,1107008,-1.0,1,-1


In [33]:
# Now we have to delete duplicates from the original DF
DaneBog = DaneBog.sort_values(by='MANCODIGO', ascending=True) #Sort by Block Code

DaneBog.drop_duplicates(subset=['MANCODIGO'], inplace=True, keep='first') #Drop duplicate rows
DaneBog.reset_index(drop=True, inplace=True) #Reset original DF index

In [34]:
# Now that we have only one block of all Bogotá city blocks we need to count how many sitp stops there are in every block and 
# make a list with their points for every block

#First we will set block codes as the DF index
DaneBog.set_index('MANCODIGO', inplace=True)
#Create new column filled with empty lists
DaneBog['SITP_Point'] = DaneBog.apply(lambda row: [] if row['index_right'] != -1 else 0, axis=1)

# Now we add every bus stop to its respective city block using unique city block code as index
DaneBog_sitp.apply(lambda row: DaneBog.loc[row['MANCODIGO'], 'SITP_Point'].append(row['Points']) if row['index_right'] != -1 else 0 , axis=1)

0        0.0
1        NaN
2        0.0
3        0.0
4        NaN
        ... 
45345    0.0
45346    0.0
45347    0.0
45348    0.0
45349    0.0
Length: 45350, dtype: float64

In [35]:
#Reset index
DaneBog.reset_index(drop=False, inplace=True)
#Drop cols from sjoin method
DaneBog.drop(columns=['index_right', 'sitp_Point', 'Nombre_Parada'], inplace=True)
#Count points per block
DaneBog['N_SITP'] = DaneBog.apply(lambda row: len(row['SITP_Point']) if row['SITP_Point'] != 0 else 0 ,axis=1)

In [36]:
del sitp_Point_df, f, sitp_point_list, sitp_stop_name, DaneBog_sitp

## Comercial establishments

In [37]:
# Read csv file where commercial in Bogotá is located
path = "/home/ubuntu/javeriana/MOTUS-PUJ/Bog_Estimation/DANE/Establecimiento_comercial/establecimientos-comerciales-2016.csv"
EcomerBog = pd.read_csv(path, encoding = 'UTF-8',  sep='\t', delimiter=";")

EcomerBog = EcomerBog.rename(columns={'Unnamed: 11': 'x', 'geo_point_2d': 'y'})

EcomerBog.head(2)

Unnamed: 0,OBJECTID,Razón Social,Dirección Comecial,Coordenada X,Coordenada Y,Codigo CIIU,Código Actividad Económica,Descripción Código Comercio,Actividad Económica,geo_shape,y,x
0,3814,LA CASITA HOGARENA,CL 70 B SUR NO. 79-68,87191.41,100953.16,1089.0,0.0,ELABORACION DE OTROS PRODUCTOS ALIMENTICIOS N....,Industrias manufactureras,"{""type"": ""Point"", ""coordinates"": [-74.19292633...",4.604844,-74.192926
1,59482,LEOPAN ESPECIAL,CL 132 D NO. 150D-04,95174.06,116337.25,1081.0,0.0,ELABORACION DE PRODUCTOS DE PANADERIA,Industrias manufactureras,"{""type"": ""Point"", ""coordinates"": [-74.12101001...",4.743909,-74.12101


In [38]:
# Number of commercial establishments reported
len(EcomerBog['OBJECTID'])

439887

In [39]:
#Get names and lat, long points list for commercial activities
EcomerBog_list = []
EcomerBog_name = []
EcomerBog.apply(lambda row: EcomerBog_list.append(Point(row['x'], row['y'])), axis=1)
EcomerBog.apply(lambda row: EcomerBog_name.append(row['Actividad Económica']), axis=1)

0         None
1         None
2         None
3         None
4         None
          ... 
439882    None
439883    None
439884    None
439885    None
439886    None
Length: 439887, dtype: object

In [40]:
# Create DataFrame with reported points
Ecomer_Point_df = GeoDataFrame(EcomerBog_list, columns=['geometry'])
Ecomer_Point_df['Actividad'] = EcomerBog_name
Ecomer_Point_df.crs = "EPSG:4326"
Ecomer_Point_df.to_crs(epsg=4326, inplace=True)

Ecomer_Point_df.head(2)

Unnamed: 0,geometry,Actividad
0,POINT (-74.19293 4.60484),Industrias manufactureras
1,POINT (-74.12101 4.74391),Industrias manufactureras


In [41]:
#Execute spatial join between Bogotá commercial activities points and city blocks
DaneBog = sjoin(DaneBog, Ecomer_Point_df, how="left", op='intersects')
DaneBog = DaneBog.reset_index(drop=True)

DaneBog = DaneBog.fillna(-1)
DaneBog['Ecomer_Point'] = DaneBog.apply(lambda row: -1 if row['index_right'] == -1 else Ecomer_Point_df.loc[row['index_right'], 'geometry'], axis=1)

DaneBog.iloc[130:133, :]

Unnamed: 0,MANCODIGO,OBJECTID,SECCODIGO,GLOBALID,Shape_Leng,Shape_Area,geometry,Hosp_Point,N_Hosp,IPS_Point,...,N_Col,PlazMer_Point,N_PlazMer,ITur_Point,N_ITur,SITP_Point,N_SITP,index_right,Actividad,Ecomer_Point
130,1101009,4816,1101,{24280147-DE3A-463D-A81E-1C1DC182C038},0.008383,3e-06,"POLYGON ((-74.08438 4.58229, -74.08431 4.58239...",[],0,[],...,0,[],0,[],0,[POINT (-74.08327159240997 4.583286970543431)],1,434359.0,Comercio al por mayor y al por menor; reparaci...,POINT (-74.08429910640001 4.58220944243)
131,1101009,4816,1101,{24280147-DE3A-463D-A81E-1C1DC182C038},0.008383,3e-06,"POLYGON ((-74.08438 4.58229, -74.08431 4.58239...",[],0,[],...,0,[],0,[],0,[POINT (-74.08327159240997 4.583286970543431)],1,382692.0,"Actividades profesionales, cient¡ficas y t‚cnicas",POINT (-74.08429910640001 4.58220944243)
132,1101009,4816,1101,{24280147-DE3A-463D-A81E-1C1DC182C038},0.008383,3e-06,"POLYGON ((-74.08438 4.58229, -74.08431 4.58239...",[],0,[],...,0,[],0,[],0,[POINT (-74.08327159240997 4.583286970543431)],1,22074.0,Industrias manufactureras,POINT (-74.08429910640001 4.58220944243)


In [42]:
# Since spatial join clones rows we need to group them by block to really count 
# how many commercial points there are in a block
DaneBog_Ecomer = DaneBog.groupby(['MANCODIGO', 'index_right'])['Ecomer_Point'].size()
DaneBog_Ecomer = pd.DataFrame(DaneBog_Ecomer)
DaneBog_Ecomer = DaneBog_Ecomer.reset_index(drop=False)
DaneBog_Ecomer['Points'] = DaneBog_Ecomer.apply(lambda row: Ecomer_Point_df.loc[row['index_right'] ,'geometry'] if row['index_right'] != -1 else -1 ,axis=1)
DaneBog_Ecomer.iloc[140:143, :]

Unnamed: 0,MANCODIGO,index_right,Ecomer_Point,Points
140,1101009,302776.0,1,POINT (-74.08429910640001 4.58220944243)
141,1101009,304575.0,1,POINT (-74.0844571713 4.582201542830001)
142,1101009,305986.0,1,POINT (-74.0827896088 4.58350429631)


In [43]:
# Now we have to delete duplicates from the original DF
DaneBog = DaneBog.sort_values(by='MANCODIGO', ascending=True) #Sort by Block Code

DaneBog.drop_duplicates(subset=['MANCODIGO'], inplace=True, keep='first') #Drop duplicate rows
DaneBog.reset_index(drop=True, inplace=True) #Reset original DF index
DaneBog.iloc[130:133, :]

Unnamed: 0,MANCODIGO,OBJECTID,SECCODIGO,GLOBALID,Shape_Leng,Shape_Area,geometry,Hosp_Point,N_Hosp,IPS_Point,...,N_Col,PlazMer_Point,N_PlazMer,ITur_Point,N_ITur,SITP_Point,N_SITP,index_right,Actividad,Ecomer_Point
130,1106009,42767,1106,{26441C86-3E64-48F8-804F-04E738EF7D4B},0.004853,5.840964e-07,"POLYGON ((-74.08431 4.56879, -74.08427 4.56877...",[],0,[],...,0,[],0,[],0,[POINT (-74.08422711018621 4.568381809638187)],1,121343.0,Industrias manufactureras,POINT (-74.0845185343 4.568513132300001)
131,1106010,15843,1106,{5FED7CB8-CEFA-4B01-BA06-5B80D65A620A},0.001821,1.90341e-07,"POLYGON ((-74.08338 4.56783, -74.08338 4.56783...",[],0,[],...,0,[],0,[],0,0,0,281963.0,Comercio al por mayor y al por menor; reparaci...,POINT (-74.0833438074 4.5680989402)
132,1106011,2817,1106,{9B9EB679-78E3-47B0-9708-552B973749CB},0.003883,4.98836e-07,"POLYGON ((-74.08345 4.56739, -74.08345 4.56740...",[],0,[],...,0,[],0,[],0,0,0,350896.0,Otras actividades de servicios,POINT (-74.08304363890001 4.5681236048)


In [44]:
# Now that we have only one block of all Bogotá city blocks we need to count how many 
#commercial establishments there are in every block and 
# make a list with their points for every block

#First we will set block codes as the DF index 
DaneBog.set_index('MANCODIGO', inplace=True)
DaneBog['Ecomer_Point'] = DaneBog.apply(lambda row: [] if row['index_right'] != -1 else 0, axis=1) #Create new column filled with empty lists

In [45]:
# Now we add every commercial activity to its respective city block using unique city block code as index
DaneBog_Ecomer.apply(lambda row: DaneBog.loc[row['MANCODIGO'], 'Ecomer_Point'].append(row['Points']) if row['index_right'] != -1 else 0 , axis=1)

0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
         ... 
456470    0.0
456471    0.0
456472    0.0
456473    0.0
456474    0.0
Length: 456475, dtype: float64

In [46]:
#Reset index
DaneBog.reset_index(drop=False, inplace=True)
#Drop cols generated by spatial join
DaneBog.drop(columns=['index_right', 'Actividad'], inplace=True)
DaneBog['N_Ecomer'] = DaneBog.apply(lambda row: len(row['Ecomer_Point']) if row['Ecomer_Point'] != 0 else 0 ,axis=1)

DaneBog.iloc[130:132, :]

Unnamed: 0,MANCODIGO,OBJECTID,SECCODIGO,GLOBALID,Shape_Leng,Shape_Area,geometry,Hosp_Point,N_Hosp,IPS_Point,...,Col_Point,N_Col,PlazMer_Point,N_PlazMer,ITur_Point,N_ITur,SITP_Point,N_SITP,Ecomer_Point,N_Ecomer
130,1106009,42767,1106,{26441C86-3E64-48F8-804F-04E738EF7D4B},0.004853,5.840964e-07,"POLYGON ((-74.08431 4.56879, -74.08427 4.56877...",[],0,[],...,[],0,[],0,[],0,[POINT (-74.08422711018621 4.568381809638187)],1,"[POINT (-74.0838420804 4.568426859980001), POI...",13
131,1106010,15843,1106,{5FED7CB8-CEFA-4B01-BA06-5B80D65A620A},0.001821,1.90341e-07,"POLYGON ((-74.08338 4.56783, -74.08338 4.56783...",[],0,[],...,[],0,[],0,[],0,0,0,"[POINT (-74.0831848927 4.5683907523), POINT (-...",9


In [47]:
del DaneBog_Ecomer, Ecomer_Point_df, EcomerBog_list, EcomerBog_name, EcomerBog

In [48]:
# Finally base DataFrame that contains the count of each spatial feature by city block. 
path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/Outputs/ManzanasGDF.gzip'
DaneBog.to_pickle(path, compression='gzip')

### You can now add the spatial feature count to a higher administrative divison

In [86]:
path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/spatial_features/Locality/Loca.shp'
Localities = gpd.read_file(path)
Localities.head(2)

Unnamed: 0,LocNombre,LocAAdmini,LocArea,LocCodigo,SHAPE_Leng,SHAPE_Area,geometry
0,ANTONIO NARIÑO,Acuerdo 117 de 2003,4879543.0,15,0.108973,0.000397,"POLYGON ((-74.13075 4.59335, -74.12917 4.59327..."
1,TUNJUELITO,Acuerdo 117 de 2003,9910940.0,6,0.210542,0.000807,"POLYGON ((-74.13777 4.59489, -74.13165 4.59363..."


In [4]:
path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/Outputs/ManzanasGDF.gzip'
DaneBog = pd.read_pickle(path, compression='gzip')
DaneBog.iloc[100:103, :].drop(columns={'GLOBALID'})

Unnamed: 0,MANCODIGO,OBJECTID,SECCODIGO,Shape_Leng,Shape_Area,geometry,Hosp_Point,N_Hosp,IPS_Point,N_IPS,Col_Point,N_Col,PlazMer_Point,N_PlazMer,ITur_Point,N_ITur,SITP_Point,N_SITP,Ecomer_Point,N_Ecomer
100,1103058,12063,1103,0.003967,3.220428e-07,"POLYGON ((-74.07823 4.57851, -74.07823 4.57851...",[],0,[],0,[],0,[],0,[],0,[POINT (-74.07789560769231 4.57959031642613)],1,"[POINT (-74.077693751 4.57989843108), POINT (-...",3
101,1103059,17053,1103,0.004543,1.139462e-07,"POLYGON ((-74.07561 4.57878, -74.07561 4.57878...",[],0,[],0,[],0,[],0,[],0,"[POINT (-74.07651068899344 4.579088815374503),...",2,[POINT (-74.07647502819999 4.57910274815)],1
102,1103060,29628,1103,0.007941,5.105871e-07,"POLYGON ((-74.07604 4.57804, -74.07604 4.57804...",[],0,[],0,[],0,[],0,[],0,0,0,0,0


In [88]:
Localities = Localities.sort_values(by='LocCodigo', ascending=True)
Localities = Localities.reset_index(drop=True)
Localities['LocCodigo'] = pd.to_numeric(Localities['LocCodigo'], downcast='integer')
Localities.head(2)

Unnamed: 0,LocNombre,LocAAdmini,LocArea,LocCodigo,SHAPE_Leng,SHAPE_Area,geometry
0,USAQUEN,Acuerdo 8 de 1977 y Res 1751 de 2016,65201410.0,1,0.436907,0.00531,"POLYGON ((-74.01116 4.66459, -74.01117 4.66460..."
1,CHAPINERO,Acuerdo 8 de 1977 y Res 1751 de 2016,38008910.0,2,0.333966,0.003095,"POLYGON ((-74.01116 4.66459, -74.01154 4.66461..."


In [89]:
def Arreglar_tilde(Texto):
    Texto = unicodedata.normalize('NFD', Texto)
    Texto = Texto.encode('ascii', 'ignore')
    Texto = Texto.decode("utf-8")
    Texto = Texto.lower()
    return(Texto)

In [90]:
Localidades = Localities['LocNombre'].to_list()
Id = Localities['LocCodigo'].to_list()
geom = Localities['geometry'].to_list()

LocOrder = GeoDataFrame(geom, columns=['geometry'])
LocOrder['Id'] = Id
LocOrder['Name'] = Localidades

LocOrder = LocOrder[['Name', 'Id', 'geometry']]
LocOrder['Name'] = LocOrder['Name'].apply(lambda row: Arreglar_tilde(Texto=row))

LocOrder.crs = "EPSG:4326"
LocOrder.to_crs(epsg=4326, inplace=True)

LocOrder.head(2)

Unnamed: 0,Name,Id,geometry
0,usaquen,1,"POLYGON ((-74.01116 4.66459, -74.01117 4.66460..."
1,chapinero,2,"POLYGON ((-74.01116 4.66459, -74.01154 4.66461..."


In [91]:
LocOrder['Hospitals'] = LocOrder.apply(lambda row: [] ,axis=1)
LocOrder['N_Hospitals'] = 0

LocOrder['IPS'] = LocOrder.apply(lambda row: [] ,axis=1)
LocOrder['N_IPS'] = 0

LocOrder['Colegios'] = LocOrder.apply(lambda row: [] ,axis=1)
LocOrder['N_Colegios'] = 0

LocOrder['PlazaMer'] = LocOrder.apply(lambda row: [] ,axis=1)
LocOrder['N_PlazaMer'] = 0

LocOrder['Turismo'] = LocOrder.apply(lambda row: [] ,axis=1)
LocOrder['N_Turismo'] = 0

LocOrder['SITP'] = LocOrder.apply(lambda row: [] ,axis=1)
LocOrder['N_SITP'] = 0

LocOrder['Comercio'] = LocOrder.apply(lambda row: [] ,axis=1)
LocOrder['N_Comercio'] = 0

In [92]:
def AggVar (BlockGeom, Var, VarName):
    for i in range(len(LocOrder['Id'])):
        if BlockGeom.within(LocOrder.loc[i, 'geometry']) == True and Var != 0:
            LocOrder.loc[i, VarName].append(Var)

def flatten(t):
    return [item for sublist in t for item in sublist]

In [93]:
# Adding Hospitals for every Locality
DaneBog.apply(lambda row: AggVar(row['geometry'], row['Hosp_Point'], 'Hospitals'), axis=1)

#Flatten localities lists 
LocOrder['Hospitals'] = LocOrder.apply(lambda row: flatten(row['Hospitals']) ,axis=1)

#Count How many Hospitals there are per Locality 
LocOrder['N_Hospitals'] = LocOrder.apply(lambda row: len(row['Hospitals']), axis=1)

In [94]:
# Adding IPS for every Locality
DaneBog.apply(lambda row: AggVar(row['geometry'], row['IPS_Point'], 'IPS'), axis=1)

#Flatten localities lists 
LocOrder['IPS'] = LocOrder.apply(lambda row: flatten(row['IPS']) ,axis=1)

#Count How many IPS there are per Locality 
LocOrder['N_IPS'] = LocOrder.apply(lambda row: len(row['IPS']), axis=1)

In [95]:
# Adding Schools for every Locality
DaneBog.apply(lambda row: AggVar(row['geometry'], row['Col_Point'], 'Colegios'), axis=1)

#Flatten localities lists 
LocOrder['Colegios'] = LocOrder.apply(lambda row: flatten(row['Colegios']) ,axis=1)

#Count How many Schools there are per Locality 
LocOrder['N_Colegios'] = LocOrder.apply(lambda row: len(row['Colegios']), axis=1)

In [96]:
# Adding Market Places for every Locality
DaneBog.apply(lambda row: AggVar(row['geometry'], row['PlazMer_Point'], 'PlazaMer'), axis=1)

#Flatten localities lists 
LocOrder['PlazaMer'] = LocOrder.apply(lambda row: flatten(row['PlazaMer']) ,axis=1)

#Count How many Market Places there are per Locality 
LocOrder['N_PlazaMer'] = LocOrder.apply(lambda row: len(row['PlazaMer']), axis=1)

In [97]:
# Adding Touristic Places for every Locality
DaneBog.apply(lambda row: AggVar(row['geometry'], row['ITur_Point'], 'Turismo'), axis=1)

#Flatten localities lists 
LocOrder['Turismo'] = LocOrder.apply(lambda row: flatten(row['Turismo']) ,axis=1)

#Count How many Touristic Places there are per Locality 
LocOrder['N_Turismo'] = LocOrder.apply(lambda row: len(row['Turismo']), axis=1)

In [98]:
# Adding Sitp stops for every Locality
DaneBog.apply(lambda row: AggVar(row['geometry'], row['SITP_Point'], 'SITP'), axis=1)

#Flatten localities lists 
LocOrder['SITP'] = LocOrder.apply(lambda row: flatten(row['SITP']) ,axis=1)

#Count How many Sitp stops there are per Locality 
LocOrder['N_SITP'] = LocOrder.apply(lambda row: len(row['SITP']), axis=1)

In [99]:
# Adding commercial places for every Locality
DaneBog.apply(lambda row: AggVar(row['geometry'], row['Ecomer_Point'], 'Comercio'), axis=1)

#Flatten localities lists 
LocOrder['Comercio'] = LocOrder.apply(lambda row: flatten(row['Comercio']) ,axis=1)

#Count How many commercial places there are per Locality 
LocOrder['N_Comercio'] = LocOrder.apply(lambda row: len(row['Comercio']), axis=1)

In [100]:
LocOrder

Unnamed: 0,Name,Id,geometry,Hospitals,N_Hospitals,IPS,N_IPS,Colegios,N_Colegios,PlazaMer,N_PlazaMer,Turismo,N_Turismo,SITP,N_SITP,Comercio,N_Comercio
0,usaquen,1,"POLYGON ((-74.01116 4.66459, -74.01117 4.66460...","[POINT (-74.03049903995752 4.696348669863934),...",9,"[POINT (-74.05239073194667 4.707054002140524),...",522,"[POINT (-74.05148000010526 4.707330000137796),...",155,[],0,"[POINT (-74.03800200007751 4.706198999783834),...",24,"[POINT (-74.04845399989965 4.709916000204656),...",641,"[POINT (-74.05215223819999 4.70672435378), POI...",29430
1,chapinero,2,"POLYGON ((-74.01116 4.66459, -74.01154 4.66461...",[],0,"[POINT (-74.06457413998453 4.630227659978573),...",499,"[POINT (-74.06135000033767 4.657600000270593),...",28,[],0,"[POINT (-74.05759900042719 4.65167600012609), ...",34,"[POINT (-74.0601199998473 4.628508999872917), ...",298,"[POINT (-74.05968344760001 4.62798567818), POI...",25759
2,santa fe,3,"POLYGON ((-73.99446 4.61425, -73.99457 4.61414...","[POINT (-74.07947713499863 4.58666469075906), ...",4,"[POINT (-74.07392426629184 4.607299182711188),...",64,"[POINT (-74.07435999966194 4.611399999950327),...",38,"[POINT (-74.07823978 4.587526918), POINT (-74....",2,"[POINT (-74.07355899977087 4.610548999818825),...",78,"[POINT (-74.07374293377337 4.612080975335097),...",175,"[POINT (-74.0723317969 4.6134431563), POINT (-...",21037
3,san cristobal,4,"POLYGON ((-74.02900 4.55830, -74.02930 4.55821...","[POINT (-74.08337294962112 4.569683380132004),...",9,"[POINT (-74.08337294930215 4.569683377985541),...",34,"[POINT (-74.0807200000083 4.5814999996867), PO...",162,[POINT (-74.09333003 4.57006792)],1,"[POINT (-74.0805869999389 4.576658999446071), ...",5,"[POINT (-74.08057397553043 4.585286425284456),...",429,"[POINT (-74.0818740288 4.58465360735), POINT (...",11885
4,usme,5,"POLYGON ((-74.05597 4.50832, -74.05611 4.50822...","[POINT (-74.08600793023331 4.497607729905649),...",13,"[POINT (-74.08993557656555 4.516155352087878),...",31,"[POINT (-74.09294000021046 4.509140000143324),...",127,[],0,"[POINT (-74.12532599995146 4.471033999632081),...",3,"[POINT (-74.08821870355031 4.505187605097326),...",311,"[POINT (-74.0867958596 4.50225913382), POINT (...",7892
5,tunjuelito,6,"POLYGON ((-74.13777 4.59489, -74.13165 4.59363...","[POINT (-74.12823188999548 4.571451640422253),...",7,"[POINT (-74.12823188569291 4.571451634885079),...",31,"[POINT (-74.12652000020293 4.571299999903396),...",69,"[POINT (-74.12948299999999 4.570148), POINT (-...",3,"[POINT (-74.13450700044578 4.571976999614483),...",2,"[POINT (-74.12687992116939 4.569925887813093),...",166,"[POINT (-74.1257151187 4.57007610969), POINT (...",10207
6,bosa,7,"POLYGON ((-74.18567 4.64705, -74.18480 4.64497...","[POINT (-74.2139538496631 4.614829649996022), ...",18,"[POINT (-74.18568781304208 4.609369681065971),...",56,"[POINT (-74.18678000006638 4.619510000035484),...",180,[],0,"[POINT (-74.18472100034971 4.609888000060257),...",3,"[POINT (-74.20875474430697 4.611374929071266),...",470,"[POINT (-74.21065774039999 4.61040971992), POI...",23357
7,kennedy,8,"POLYGON ((-74.18567 4.64705, -74.18527 4.64699...","[POINT (-74.13779790033574 4.60641904019065), ...",20,"[POINT (-74.13396250966207 4.629304326933209),...",189,"[POINT (-74.17456999990993 4.633669999822766),...",332,[POINT (-74.1603733 4.619479539)],1,"[POINT (-74.13527300017675 4.623412000102226),...",8,"[POINT (-74.17283972335917 4.63182963814141), ...",835,"[POINT (-74.1717651811 4.6273095886), POINT (-...",51988
8,fontibon,9,"POLYGON ((-74.15857 4.66274, -74.15848 4.66279...","[POINT (-74.11465542023956 4.655812319622921),...",8,"[POINT (-74.11849471096212 4.668231872902652),...",133,"[POINT (-74.11873999964931 4.667290000098864),...",123,[POINT (-74.14450816 4.676126732)],1,"[POINT (-74.10927299982305 4.65564700003319), ...",10,"[POINT (-74.12943703389334 4.688870697853618),...",291,"[POINT (-74.1291155987 4.68865509044), POINT (...",20608
9,engativa,10,"POLYGON ((-74.15547 4.71798, -74.15547 4.71798...","[POINT (-74.08623682995086 4.685978129896145),...",14,"[POINT (-74.08303793655148 4.690620529413934),...",180,"[POINT (-74.08206999989537 4.684380000324905),...",368,"[POINT (-74.089517 4.686316), POINT (-74.09959...",3,"[POINT (-74.09728799979752 4.664745999863851),...",6,"[POINT (-74.08374088466222 4.682430070394809),...",695,"[POINT (-74.08404998020001 4.68127012878), POI...",46986


In [101]:
Hos = LocOrder['N_Hospitals'].tolist() #160
sum(Hos)

159

In [102]:
ipss = LocOrder['N_IPS'].tolist() #2900
sum(ipss)

2753

In [103]:
coles = LocOrder['N_Colegios'].tolist() #2539
sum(coles)

2838

In [104]:
plazas = LocOrder['N_PlazaMer'].tolist() #19
sum(plazas)

19

In [105]:
turismo = LocOrder['N_Turismo'].tolist() #442
sum(turismo)

366

In [106]:
nsitp = LocOrder['N_SITP'].tolist() #7653
sum(nsitp)

7014

In [107]:
ncomer = LocOrder['N_Comercio'].tolist() #439887
sum(ncomer)

441643

In [108]:
path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/Outputs/LocalidadGDF.gzip'
LocOrder.to_pickle(path, compression='gzip')