# Proximity data to Area of Prediction

This notebook contains the processes that interpolate proximity to certain amenities to the are of prediction.

## Import libraries

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import aup

## Data download/read and preprocessing

### Area of analysis

In [2]:
aoa_gdf = gpd.read_file("../../data/processed/prediccion_uso_suelo/Distrito/distrito.shp")
print(aoa_gdf.shape)
aoa_gdf.head(2)

(1, 2)


Unnamed: 0,id,geometry
0,1,"POLYGON ((-103.4233 20.91026, -103.42209 20.91..."


### Area of prediction with variables

In [3]:
gdf = gpd.read_file('../../data/processed/prediccion_uso_suelo/complete_model/area_of_prediction_roads.gpkg')
print(gdf.shape)
gdf.head(2)

(682585, 43)


Unnamed: 0,uso_suelo,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_m2,pred_area_pct,bld_pred_area_pct,pobtot,...,temperature_mean_diff,ndvi_mean,ndmi_diff,motorway_distance,primary_distance,secondary_distance,tertiary_distance,residential_distance,other_distance,geometry
0,Sin dato,67.279056,75GRMGQW+89PG,1412000011206018,12.438638,278416.454431,67.279056,0.000242,0.184881,0,...,1.787026,0.084339,0.045966,173.493879,346.922028,83.7398,480.266657,24.764744,12.801915,"MULTIPOLYGON (((-103.45405 20.68833, -103.4541..."
1,Sin dato,45.355545,75GRMGQW+8CFP,1412000011206018,4.482118,278416.454431,45.355545,0.000163,0.098822,0,...,1.649216,0.109751,0.045719,173.493879,346.922028,83.7398,480.266657,24.764744,12.801915,"MULTIPOLYGON (((-103.4539 20.68833, -103.454 2..."


### Proximity data

In [4]:
buffer = aoa_gdf.to_crs("EPSG:6372").buffer(100)
buffer = gpd.GeoDataFrame(geometry = buffer)
buffer = buffer.to_crs("EPSG:4326")
buffer

Unnamed: 0,geometry
0,"POLYGON ((-103.592 20.74856, -103.59219 20.748..."


In [5]:
table = 'proximity_v2_23_point'
schema = 'prox_analysis'

prox_nodes = aup.gdf_from_polygon(buffer, schema, table)

print(prox_nodes.shape)
prox_nodes.head(2)

(63320, 69)


Unnamed: 0,osmid,denue_preescolar,denue_preescolar_15min,denue_primaria,denue_primaria_15min,denue_secundaria,denue_secundaria_15min,clues_primer_nivel,clues_primer_nivel_15min,denue_guarderias,...,denue_museos,denue_museos_15min,denue_bibliotecas,denue_bibliotecas_15min,denue_centrocultural,denue_centrocultural_15min,x,y,geometry,city
0,273146454,2.454897,4,4.260768,1,15.860206,0,3.041695,11,17.52591,...,39.703464,0,17.089605,0,81.541184,0,-103.406217,20.671926,POINT (-103.40622 20.67193),Guadalajara
1,273146456,8.036414,3,13.708478,1,15.150906,0,7.928151,5,20.507703,...,47.302564,0,27.638295,0,85.996093,0,-103.411528,20.674774,POINT (-103.41153 20.67477),Guadalajara


Filter proximity elements

In [6]:
cols = ['osmid','denue_primaria','denue_primaria_15min',
       'denue_abarrotes','denue_abarrotes_15min','denue_peluqueria',
       'denue_peluqueria_15min','denue_lavanderia','denue_lavanderia_15min',
       'clues_primer_nivel','clues_primer_nivel_15min','geometry']
prox_nodes = prox_nodes[cols].copy()

print(prox_nodes.shape)
prox_nodes.head(2)

(63320, 12)


Unnamed: 0,osmid,denue_primaria,denue_primaria_15min,denue_abarrotes,denue_abarrotes_15min,denue_peluqueria,denue_peluqueria_15min,denue_lavanderia,denue_lavanderia_15min,clues_primer_nivel,clues_primer_nivel_15min,geometry
0,273146454,4.260768,1,3.563373,6,2.454897,36,9.127368,8,3.041695,11,POINT (-103.40622 20.67193)
1,273146456,13.708478,1,6.549081,5,6.014812,26,13.846584,2,7.928151,5,POINT (-103.41153 20.67477)


## Proximity to area of analysis

Set CRS

In [7]:
prox_nodes = prox_nodes.to_crs("EPSG:6372")
gdf = gdf.to_crs("EPSG:6372")

Create centroids with unique id

In [8]:
if 'fid' not in gdf.columns:
    gdf = gdf.reset_index().rename(columns={'index':'fid'})

In [9]:
gdf_cnt = gdf[['fid','geometry']].copy()
gdf_cnt['geometry'] = gdf_cnt.centroid

Interpolate proximity data

In [10]:
for col in cols:
    
    if (col != 'osmid') and (col != 'geometry'):

        division_value = 10000

        gdf_int = gpd.GeoDataFrame()
        
        for i in range(round(len(gdf_cnt)/division_value)):
            
            gdf_tmp = gdf_cnt.iloc[i*division_value:
            (i+1)*division_value].copy()
            
            int_vals = aup.interpolate_at_points(prox_nodes.centroid.x,
                                             prox_nodes.centroid.y, 
                                             prox_nodes[col],
                                             gdf_tmp.geometry.x, 
                                             gdf_tmp.geometry.y,
                                            power=2,
                                            search_radius=300)
            gdf_tmp[col] = int_vals

            gdf_int = pd.concat([gdf_int,gdf_tmp])
        
        gdf_cnt = gdf_cnt.merge(gdf_int[['fid',col]], on='fid')
        print('Finished processing',col)
        

Finished processing denue_primaria
Finished processing denue_primaria_15min
Finished processing denue_abarrotes
Finished processing denue_abarrotes_15min
Finished processing denue_peluqueria
Finished processing denue_peluqueria_15min
Finished processing denue_lavanderia
Finished processing denue_lavanderia_15min
Finished processing clues_primer_nivel
Finished processing clues_primer_nivel_15min


Merge data

In [13]:
gdf = gdf.merge(gdf_cnt.drop(columns=['geometry']),
               on='fid')
print(gdf.shape)
gdf.head(2)

(680000, 64)


Unnamed: 0,fid,uso_suelo,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_m2,pred_area_pct,bld_pred_area_pct,...,denue_primaria_y,denue_primaria_15min_y,denue_abarrotes_y,denue_abarrotes_15min_y,denue_peluqueria_y,denue_peluqueria_15min_y,denue_lavanderia_y,denue_lavanderia_15min_y,clues_primer_nivel_y,clues_primer_nivel_15min_y
0,0,Sin dato,67.279056,75GRMGQW+89PG,1412000011206018,12.438638,278416.454431,67.279056,0.000242,0.184881,...,21.27848,0.688413,11.817491,3.19918,12.87834,3.440951,13.706626,1.476592,18.913284,0.358167
1,1,Sin dato,45.355545,75GRMGQW+8CFP,1412000011206018,4.482118,278416.454431,45.355545,0.000163,0.098822,...,19.011938,0.890523,11.070963,3.913038,12.435124,4.003971,13.505759,1.676774,16.916017,0.46892


## Save

In [12]:
gdf.to_file('../../data/processed/prediccion_uso_suelo/complete_model/area_of_prediction_prox.gpkg')