# Proximity data to Area of Prediction

This notebook contains the processes that interpolate proximity to certain amenities to the are of prediction.

## Import libraries

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import aup

## Data download/read and preprocessing

### Area of analysis

In [2]:
aoa_gdf = gpd.read_file("../../data/processed/prediccion_uso_suelo/Distrito_Zona5.geojson")
print(aoa_gdf.shape)
aoa_gdf.head(2)

(1, 4)


Unnamed: 0,id,area,perimeter,geometry
0,0,34569080.0,22034.335455,"POLYGON ((662437.93 2284457.02, 662421.516 228..."


### Area of prediction with variables

In [3]:
gdf = gpd.read_file('../../data/processed/prediccion_uso_suelo/test/area_of_prediction_env.gpkg')
print(gdf.shape)
gdf.head(2)

(37061, 32)


Unnamed: 0,uso_suelo,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_m2,pred_area_pct,bld_pred_area_pct,pobtot,...,pct_servicios,pct_comercio,pct_salud,pct_educacion,pct_gobierno,pct_industria,temperature_mean_diff,ndvi_mean,ndmi_diff,geometry
0,Servicio,2257.817773,75GRMHR2+458V,1412000011206018,173.7348,278416.454431,2747.599834,0.009869,0.063231,0,...,0.740466,0.231342,0.0,0.0,0.0,0.028192,1.425273,0.254363,0.085301,"MULTIPOLYGON (((2349620.239 965525.905, 234961..."
1,Sin dato,711.09856,75GRMGRX+GQP3,1412000011206018,81.7922,278416.454431,900.765482,0.003235,0.090803,0,...,0.586683,0.354285,0.0,0.0,0.0,0.059033,2.330655,0.280296,0.070458,"MULTIPOLYGON (((2349506.214 965639.352, 234950..."


### Proximity data

In [4]:
buffer = aoa_gdf.to_crs("EPSG:6372").buffer(100)
buffer = gpd.GeoDataFrame(geometry = buffer)
buffer = buffer.to_crs("EPSG:4326")
buffer

Unnamed: 0,geometry
0,"POLYGON ((-103.4407 20.65148, -103.44079 20.65..."


In [5]:
table = 'proximity_v2_23_point'
schema = 'prox_analysis'

prox_nodes = aup.gdf_from_polygon(buffer, schema, table)

print(prox_nodes.shape)
prox_nodes.head(2)

(9575, 69)


Unnamed: 0,osmid,denue_preescolar,denue_preescolar_15min,denue_primaria,denue_primaria_15min,denue_secundaria,denue_secundaria_15min,clues_primer_nivel,clues_primer_nivel_15min,denue_guarderias,...,denue_museos,denue_museos_15min,denue_bibliotecas,denue_bibliotecas_15min,denue_centrocultural,denue_centrocultural_15min,x,y,geometry,city
0,28791518,7.341088,6,7.777907,4,26.270394,0,8.583527,9,10.816726,...,29.825454,0,24.933597,0,88.667714,0,-103.402947,20.674933,POINT (-103.40295 20.67493),Guadalajara
1,28791819,14.330918,2,15.698086,0,18.886949,0,21.883063,0,11.384115,...,79.918412,0,32.323755,0,91.512585,0,-103.454766,20.695994,POINT (-103.45477 20.69599),Guadalajara


Filter proximity elements

In [6]:
cols = ['osmid','denue_primaria','denue_primaria_15min',
       'denue_abarrotes','denue_abarrotes_15min','denue_peluqueria',
       'denue_peluqueria_15min','denue_lavanderia','denue_lavanderia_15min',
       'clues_primer_nivel','clues_primer_nivel_15min','geometry']
prox_nodes = prox_nodes[cols].copy()

print(prox_nodes.shape)
prox_nodes.head(2)

(9575, 12)


Unnamed: 0,osmid,denue_primaria,denue_primaria_15min,denue_abarrotes,denue_abarrotes_15min,denue_peluqueria,denue_peluqueria_15min,denue_lavanderia,denue_lavanderia_15min,clues_primer_nivel,clues_primer_nivel_15min,geometry
0,28791518,7.777907,4,4.427983,9,5.178137,46,6.701719,7,8.583527,9,POINT (-103.40295 20.67493)
1,28791819,15.698086,0,8.995115,7,8.995115,4,8.995115,1,21.883063,0,POINT (-103.45477 20.69599)


## Proximity to area of analysis

Set CRS

In [7]:
prox_nodes = prox_nodes.to_crs("EPSG:6372")
gdf = gdf.to_crs("EPSG:6372")

Create centroids with unique id

In [8]:
if 'fid' not in gdf.columns:
    gdf = gdf.reset_index().rename(columns={'index':'fid'})

In [21]:
gdf_cnt = gdf[['fid','geometry']].copy()
gdf_cnt['geometry'] = gdf_cnt.centroid

Interpolate proximity data

In [22]:
for col in cols:
    if (col != 'osmid') and (col != 'geometry'):
        int_vals = aup.interpolate_at_points(prox_nodes.centroid.x,
                                         prox_nodes.centroid.y, 
                                         prox_nodes[col],
                                         gdf_cnt.geometry.x, 
                                         gdf_cnt.geometry.y,
                                        power=2,
                                        search_radius=300)
        
        gdf_cnt[col] = int_vals
        print('Finished processing',col)
        

Finished processing denue_primaria
Finished processing denue_primaria_15min
Finished processing denue_abarrotes
Finished processing denue_abarrotes_15min
Finished processing denue_peluqueria
Finished processing denue_peluqueria_15min
Finished processing denue_lavanderia
Finished processing denue_lavanderia_15min
Finished processing clues_primer_nivel
Finished processing clues_primer_nivel_15min


Merge data

In [23]:
gdf = gdf.merge(gdf_cnt.drop(columns=['geometry']),
               on='fid')
print(gdf.shape)
gdf.head(2)

(37061, 43)


Unnamed: 0,fid,uso_suelo,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_m2,pred_area_pct,bld_pred_area_pct,...,denue_primaria,denue_primaria_15min,denue_abarrotes,denue_abarrotes_15min,denue_peluqueria,denue_peluqueria_15min,denue_lavanderia,denue_lavanderia_15min,clues_primer_nivel,clues_primer_nivel_15min
0,0,Servicio,2257.817773,75GRMHR2+458V,1412000011206018,173.7348,278416.454431,2747.599834,0.009869,0.063231,...,22.802883,0.0,7.696036,5.772197,8.017103,7.950595,8.005108,3.557762,16.196502,0.555897
1,1,Sin dato,711.09856,75GRMGRX+GQP3,1412000011206018,81.7922,278416.454431,900.765482,0.003235,0.090803,...,22.885795,0.0,6.115355,3.994591,6.128543,6.752205,6.148625,2.126203,18.010403,0.042501


## Save

In [24]:
gdf.to_file('../../data/processed/prediccion_uso_suelo/test/area_of_prediction_prox.gpkg')