## Clean data from the KML bicis infraestructure
---

This notebook cleans these datasets provided by: enbicipormadrid.es with a KML file that needs to be parsed in qGIS.

That file outputs several geopackages, one for each type of ciclyst type.


In [1]:
import geopandas as gpd
import pandas as pd

%matplotlib inline

!cd ..
!pwd


/home/laura/Documents/git/SatAi_BicycleFriendlyPath/geodata


----

In [4]:
geof = []

for file in ['bici_30kmh', 'bici_doblesentido_permitido', 'bici_itinerario_facil', 'bici_viaciclista_calzada', 'bici_viaciclista_peatonal', 'bivi_ciclocarrilbus']:
    geof.append(gpd.read_file(f"../data/bici_data/{file}.gpkg"))
    assert geof[-1].crs.name == 'WGS 84'


gdf = pd.concat(geof, axis = 0)[['Name', 'geometry']]
gdf.head()

Unnamed: 0,Name,geometry
0,Ciclocarril,"MULTIPOLYGON (((-3.62518 40.36686, -3.62519 40..."
0,Calle 20,"MULTIPOLYGON (((-3.62332 40.37901, -3.62259 40..."
0,Itinerario recomendado,"MULTIPOLYGON (((-3.45667 40.30889, -3.45667 40..."
0,Carril bici unidireccional en calzada,"MULTIPOLYGON (((-3.47234 40.23493, -3.47263 40..."
0,Senda peatonal ciclable,"MULTIPOLYGON (((-3.45627 40.29878, -3.45312 40..."


In [5]:
type(gdf)

geopandas.geodataframe.GeoDataFrame

In [6]:
# gdf.plot(column='Name', figsize=(25,25))

----

We open the file that we cleaned before with all the information:

In [7]:
v = gpd.read_file("../data/base_data/viario_ejes.gpkg")
v.head(2)

Unnamed: 0,id_tramo,id_vial,id_code,via_code,via_name,clase_code,clase_deno,clase_name,tramo_code,tramo_name,...,estado_name,vehic_code,vehic_name,orden_code,orden_name,district_code,district_name,shp_avg_width,shp_log_width,geometry
0,280790192099,600000000158,56057,1000,carretera,1002,autovía,m-13,1,troncal,...,en_uso,1,solo_vehículo,p,principal,079 - 21,barajas,7.26,1.98238,MULTILINESTRING Z ((-3.57465 40.47525 599.7150...
1,280790192099,600000000158,56057,1000,carretera,1002,autovía,m-13,1,troncal,...,en_uso,1,solo_vehículo,p,principal,079 - 21,barajas,8.47,2.136531,MULTILINESTRING Z ((-3.57465 40.47525 599.7150...


In [8]:
# vamos a sacar los puntos medios y quedarnos sólo con las tres primeras columnas, para aligerar el pegado espacial
pt = v[['id_tramo', 'id_vial', 'id_code', 'geometry']]
pt['geometry'] = pt.geometry.centroid

pt.sindex.valid_query_predicates


  pt['geometry'] = pt.geometry.centroid
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


{None,
 'contains',
 'contains_properly',
 'covers',
 'crosses',
 'intersects',
 'overlaps',
 'touches',
 'within'}

In [9]:
pt = gpd.sjoin(pt, gdf, how = 'left', predicate='within')
pt.drop(columns=['geometry', 'index_right'], inplace=True)
pt.dropna(axis=0, inplace=True)

pt.rename(columns={'Name': 'cyclist_type'}, inplace=True)
pt['cyclist_type'].value_counts()

Itinerario recomendado                   13685
Senda peatonal ciclable                   3895
Ciclocarril                               3029
Carril bici unidireccional en calzada     2367
Ciclocarril+carril bus                    2232
Calle 20                                  1153
Name: cyclist_type, dtype: int64

In [10]:
pt.head()

Unnamed: 0,id_tramo,id_vial,id_code,cyclist_type
74,280790244489,661022100004,22100004,Carril bici unidireccional en calzada
141,280790243905,661022100004,22100004,Carril bici unidireccional en calzada
189,280790134931,280790002846,2807910590,Itinerario recomendado
190,280790134931,280790002846,2807910590,Itinerario recomendado
200,280790029249,280790007375,2807900250,Itinerario recomendado


In [11]:
v = v.merge(pt, left_on = ['id_tramo', 'id_vial', 'id_code'], right_on = ['id_tramo', 'id_vial', 'id_code'])
v.head()

Unnamed: 0,id_tramo,id_vial,id_code,via_code,via_name,clase_code,clase_deno,clase_name,tramo_code,tramo_name,...,vehic_code,vehic_name,orden_code,orden_name,district_code,district_name,shp_avg_width,shp_log_width,geometry,cyclist_type
0,280790244489,661022100004,22100004,4005,red_ten-t,1002,autovía,básica_corredor_mixto,1,troncal,...,1,solo_vehículo,p,principal,079 - 18,villa de vallecas,10.18,2.320425,MULTILINESTRING Z ((-3.63923 40.32720 565.6000...,Carril bici unidireccional en calzada
1,280790243905,661022100004,22100004,4005,red_ten-t,1002,autovía,básica_corredor_mixto,1,troncal,...,1,solo_vehículo,p,principal,079 - 18,villa de vallecas,10.18,2.320425,MULTILINESTRING Z ((-3.63921 40.32750 566.5000...,Carril bici unidireccional en calzada
2,280790134931,280790002846,2807910590,2122,calle,2000,urbano,joaquin_bau,1,troncal,...,111,peatón_+_bici_+_vehículo,-998,no_aplicable,079 - 05,chamartin,12.09,2.492379,MULTILINESTRING Z ((-3.68675 40.46327 723.6000...,Itinerario recomendado
3,280790134931,280790002846,2807910590,2122,calle,2000,urbano,joaquin_bau,1,troncal,...,111,peatón_+_bici_+_vehículo,-998,no_aplicable,079 - 05,chamartin,12.09,2.492379,MULTILINESTRING Z ((-3.68675 40.46327 723.6000...,Itinerario recomendado
4,280790134931,280790002846,2807910590,2122,calle,2000,urbano,joaquin_bau,1,troncal,...,111,peatón_+_bici_+_vehículo,-998,no_aplicable,079 - 05,chamartin,6.65,1.894617,MULTILINESTRING Z ((-3.68675 40.46327 723.6000...,Itinerario recomendado


In [12]:
v.to_file("../data/base_data/viario_ejes_02.gpkg")

---

Once we have that, we open the axis of cyclist infrastructure provided by Madrid's Data Bank

In [13]:
bici = gpd.read_file("../data/bici_data/bici_ejes.gpkg")
bici.shape

(52295, 34)

In [14]:
bici.columns

Index(['id_tramo', 'id_vial', 'id_code', 'via_code', 'via_name', 'clase_code',
       'clase_deno', 'clase_name', 'tramo_code', 'tramo_name', 'calzada_code',
       'calzada_name', 'access_code', 'access_name', 'firme_code',
       'firme_name', 'ncarriles_code', 'sentido_code', 'sentido_name',
       'situacion_code', 'situacion_name', 'estado_code', 'estado_name',
       'vehic_code', 'vehic_name', 'orden_code', 'orden_name', 'district_code',
       'district_name', 'shp_avg_width', 'shp_log_width', 'cyclist_type',
       'length', 'geometry'],
      dtype='object')

In [15]:
v = gpd.read_file("../data/base_data/viario_ejes.gpkg")
v.shape

(117616, 32)

In [16]:
v.set_index(v.columns.tolist()[:-4]).shape

(117616, 4)

In [17]:

index_columns = ['id_tramo', 'id_vial', 'id_code', 'via_code', 'clase_code', 'access_code', 'district_code', 'shp_avg_width']

f = v.set_index(index_columns).join(
    bici.set_index(index_columns)[['cyclist_type']], 
    on=index_columns, 
    how='left'
    ).reset_index(drop=False)

In [18]:
f.shape

(150783, 33)

In [20]:
f.to_file("../data/base_data/viario_ejes_04.gpkg")