# 99a - Script 21 nodes output comparison

This notebook compares times by osmid for the original proxanalysis (2020) the Script 21 output.

* First result was successful but not perfect. Some sum of diff are over 8 minutes.
* Also, there are differences in the amount of nodes. Further tests required.

## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  ox.config(


## Load data

### Load data - Load original data (Already in database)

In [2]:
city = 'Aguascalientes'
schema = 'prox_analysis'
table = 'nodes_proximity_2020'

query = f"SELECT * FROM {schema}.{table} WHERE \"metropolis\" LIKE \'{city}\'"
nodes_gdf = aup.gdf_from_query(query, geometry_col='geometry')

# Show
print(nodes_gdf.shape)
nodes_gdf.head(1)

(2208953, 7)


Unnamed: 0,osmid,x,y,geometry,time,amenity,metropolis
0,272921360,-102.295073,21.872876,POINT (-102.29507 21.87288),12.607833,sip_centro_admin,Aguascalientes


In [7]:
# Transform original nodes data
nodes_analysis = nodes_gdf.drop_duplicates(subset='osmid', keep="last")[['osmid','geometry','metropolis']].copy()
for amenidad in list(nodes_gdf.amenity.unique()):
    nodes_tmp = nodes_gdf.loc[nodes_gdf.amenity == amenidad,['osmid','time']]
    nodes_tmp = nodes_tmp.rename(columns={'time':amenidad})

    if nodes_tmp[amenidad].mean() == 0:
        nodes_tmp[amenidad] = np.nan

    nodes_analysis = nodes_analysis.merge(nodes_tmp, on='osmid')

# Show
print(nodes_analysis.shape)
nodes_analysis.head(1)

(51371, 46)


Unnamed: 0,osmid,geometry,metropolis,sip_centro_admin,sip_teatro,sip_cancha,sip_unidad_deportiva,sip_espacio_publico,sip_mercado,clues_primer_nivel,...,denue_cines,denue_centro_cultural,denue_parque_natural,denue_papelerias,denue_libros,denue_revistas_periodicos,denue_ferreteria_tlapaleria,denue_art_limpieza,denue_pintura,denue_peluqueria
0,272921360,POINT (-102.29507 21.87288),Aguascalientes,12.607833,16.06632,156.885198,28.44305,5.692959,22.145997,6.486561,...,26.069111,8.894186,,3.02033,3.312247,10.861234,4.743875,8.534015,8.714079,4.097719


## Load data - Load prox script 21 data (Aguascalientes, version 1)

In [29]:
# test
dir = "../../data/external/temporal_fromjupyter/proximity_v2/test_proxanalysis_scriptv2_nodes.gpkg"
nodes_test = gpd.read_file(dir)

# Show
print(nodes_test.shape)
nodes_test.head(1)

(51434, 34)


Unnamed: 0,osmid,denue_preescolar,denue_primaria,denue_secundaria,clues_primer_nivel,denue_guarderias,denue_dif,denue_supermercado,denue_abarrotes,denue_carnicerias,...,denue_cafe,sip_cancha,sip_unidad_deportiva,sip_espacio_publico,denue_parque_natural,denue_cines,denue_museos,x,y,geometry
0,272921360,10.770629,5.615674,11.812107,6.486561,5.09561,6.486561,3.592175,1.673421,6.486561,...,5.25473,156.885198,28.44305,5.692959,,26.069111,7.589368,-102.295073,21.872876,POINT (-102.29507 21.87288)


## Compare data

In [30]:
# Create amenities list which will be compared
amenities_list = list(nodes_test.columns)
amenities_list.remove('osmid')
amenities_list.remove('x')
amenities_list.remove('y')
amenities_list.remove('geometry')
amenities_list

['denue_preescolar',
 'denue_primaria',
 'denue_secundaria',
 'clues_primer_nivel',
 'denue_guarderias',
 'denue_dif',
 'denue_supermercado',
 'denue_abarrotes',
 'denue_carnicerias',
 'sip_mercado',
 'denue_peluqueria',
 'denue_farmacias',
 'denue_ferreteria_tlapaleria',
 'denue_art_limpieza',
 'denue_ropa',
 'denue_calzado',
 'denue_muebles',
 'denue_lavanderia',
 'denue_revistas_periodicos',
 'denue_pintura',
 'denue_restaurante_insitu',
 'denue_restaurante_llevar',
 'denue_bares',
 'denue_cafe',
 'sip_cancha',
 'sip_unidad_deportiva',
 'sip_espacio_publico',
 'denue_parque_natural',
 'denue_cines',
 'denue_museos']

In [32]:
# Inner merge (to compare same osmids)
compare = nodes_analysis.merge(nodes_test,on='osmid',how='inner')

# Compare old and new amenities
compare_list = []
for amenity in amenities_list:

    old_amenity = f"{amenity}_x"
    new_amenity = f"{amenity}_y"
    compare[f"{amenity}_diff"] = compare[new_amenity] - compare[old_amenity]

    compare_list.append(f"{amenity}_diff")

# Save df with time differences only
compare = compare[compare_list]

# Visualize sum of time differences
summary = pd.DataFrame()
i = 0
for compare_amenity in compare_list:

    diff_value = compare[compare_amenity].sum()
    summary.loc[i,'amenity'] = compare_amenity
    summary.loc[i,'diff'] = diff_value

    i = i+1

summary

Unnamed: 0,amenity,diff
0,denue_preescolar_diff,-7.665077e-12
1,denue_primaria_diff,-1.437461e-12
2,denue_secundaria_diff,1.616421e-11
3,clues_primer_nivel_diff,-6.788167e-12
4,denue_guarderias_diff,-6.542461e-12
5,denue_dif_diff,-1.041363e-11
6,denue_supermercado_diff,-2.764117e-11
7,denue_abarrotes_diff,-8.497182e-12
8,denue_carnicerias_diff,-2.564849e-11
9,sip_mercado_diff,-1.812901e-11


In [34]:
# denue_parque_natural_diff is 0 because there are no parques naturales in Aguascalientes