# 98c - test - output comparison

This notebook compares mean times for the original proxanalysis (2020), the Script 21 output (From Notebook 98b) and the Script 21 redeveloped (after adding count_pois modifications, from Notebook 98c) __in order to make sure count pois modifications did not alter proximity code__.

__Results__:

* Result was successful. 0.0 mins in mean times differences by amenity, eje and max_time.

## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

## Load data

### Load data - Load original data (Already in database)

In [2]:
# Version 1 original (Script 01 + 02 + 15)
city = 'Aguascalientes'
prox_schema = 'prox_analysis'
prox_table = 'time_15_min_analysis_hexres8'

query = f"SELECT * FROM {prox_schema}.{prox_table} WHERE \"city\" LIKE \'{city}\'"
v1_original_prox_gdf = aup.gdf_from_query(query, geometry_col='geometry')

v1_original_prox_gdf.rename(columns={'max_idx_15_min':'max_time',
                                  'dens_pobha':'dens_pob_ha'},inplace=True)

# Show
print(v1_original_prox_gdf.shape)
v1_original_prox_gdf.head(1)

(309, 30)


Unnamed: 0,hex_id_8,geometry,max_escuelas,max_preescolar,max_primaria,max_secundaria,max_servicios comunitarios,max_salud,max_guarderías,max_asistencia social,...,max_time,pobtot,pobfem,pobmas,pob_0a14,pob_15a24,pob_25a59,p_60ymas,dens_pob_ha,city
0,88498e36dbfffff,"POLYGON ((-102.34777 21.92336, -102.34267 21.9...",15.057619,13.586498,15.057619,11.969535,62.167866,53.640209,50.912589,62.167866,...,71.010665,264.7125,131.68126,133.03125,75.9375,50.9625,117.731249,20.081251,3.063433,Aguascalientes


### Load data - Load prox script 21 data (Aguascalientes, version 1)

In [6]:
# Version 1 script 21
v1_2024_prox_gdf = gpd.read_file('../../../data/external/temporal_fromjupyter/proximity_v2/test_proxanalysis_scriptv1.gpkg', driver='GPKG')

# Show
print(v1_2024_prox_gdf.shape)
v1_2024_prox_gdf.head(1)

(309, 42)


Unnamed: 0,hex_id,res,max_escuelas,max_preescolar,max_primaria,max_secundaria,max_servicios comunitarios,max_salud,max_guarderías,max_asistencia social,...,idx_actividad física,idx_cultural,mean_time,median_time,max_time,idx_sum,pobtot,dens_pob_ha,city,geometry
0,88498e36dbfffff,8,15.057619,13.586498,15.057619,11.969535,62.167866,53.640209,50.912589,62.167866,...,0.854075,0.002455,28.977914,17.066295,71.010665,8.47822,264.7125,3.063433,Aguascalientes,"POLYGON ((-102.34777 21.92336, -102.34267 21.9..."


In [7]:
# Version 1 script 21 con countpois
v1_countpois_2024_prox_gdf = gpd.read_file('../../../data/external/temporal_fromjupyter/proximity_v2/test_proxanalysis_scriptv1_countpois.gpkg', driver='GPKG')

# Show
print(v1_countpois_2024_prox_gdf.shape)
v1_countpois_2024_prox_gdf.head(1)

(309, 60)


Unnamed: 0,hex_id,res,max_escuelas,max_preescolar,max_primaria,max_secundaria,max_servicios comunitarios,max_salud,max_guarderías,max_asistencia social,...,idx_actividad física,idx_cultural,mean_time,median_time,max_time,idx_sum,pobtot,dens_pob_ha,city,geometry
0,88498e36dbfffff,8,15.057619,13.586498,15.057619,11.969535,62.167866,53.640209,50.912589,62.167866,...,0.854075,0.002455,28.977914,17.066295,71.010665,8.47822,264.7125,3.063433,Aguascalientes,"POLYGON ((-102.34777 21.92336, -102.34267 21.9..."


## Data comparison

In [8]:
datos = ['max_escuelas','max_preescolar','max_primaria','max_secundaria',
         'max_servicios comunitarios','max_salud','max_guarderías','max_asistencia social',
         'max_comercio','max_alimentos','max_personal','max_farmacias','max_hogar','max_complementarios',
         'max_entretenimiento','max_social','max_actividad física','max_cultural',
         'max_time']

summarydata = pd.DataFrame()

i = 0

for c in datos:
    summarydata.loc[i,'columna'] = c
    summarydata.loc[i,'org_data'] = v1_original_prox_gdf[c].mean()
    summarydata.loc[i,'v1'] = v1_2024_prox_gdf[c].mean()
    summarydata.loc[i,'v1_countpois'] = v1_countpois_2024_prox_gdf[c].mean()    
    
    i = i+1

summarydata['org-v1'] = summarydata['org_data'] - summarydata['v1']
summarydata['org-v1count'] = summarydata['v1'] - summarydata['v1_countpois']
summarydata['v1-v1count'] = summarydata['v1'] - summarydata['v1_countpois']

summarydata

Unnamed: 0,columna,org_data,v1,v1_countpois,org-v1,org-v1count,v1-v1count
0,max_escuelas,30.410337,30.410337,30.410337,0.0,0.0,0.0
1,max_preescolar,19.174252,19.174252,19.174252,0.0,0.0,0.0
2,max_primaria,19.586162,19.586162,19.586162,0.0,0.0,0.0
3,max_secundaria,29.502061,29.502061,29.502061,0.0,0.0,0.0
4,max_servicios comunitarios,51.22157,51.22157,51.22157,0.0,0.0,0.0
5,max_salud,30.004489,30.004489,30.004489,0.0,0.0,0.0
6,max_guarderías,24.394581,24.394581,24.394581,0.0,0.0,0.0
7,max_asistencia social,48.149934,48.149934,48.149934,0.0,0.0,0.0
8,max_comercio,22.000875,22.000875,22.000875,0.0,0.0,0.0
9,max_alimentos,11.743858,11.743858,11.743858,0.0,0.0,0.0


In [9]:
v1_original_prox_gdf.columns

Index(['hex_id_8', 'geometry', 'max_escuelas', 'max_preescolar',
       'max_primaria', 'max_secundaria', 'max_servicios comunitarios',
       'max_salud', 'max_guarderías', 'max_asistencia social', 'max_comercio',
       'max_alimentos', 'max_personal', 'max_farmacias', 'max_hogar',
       'max_complementarios', 'max_entretenimiento', 'max_social',
       'max_actividad física', 'max_cultural', 'max_time', 'pobtot', 'pobfem',
       'pobmas', 'pob_0a14', 'pob_15a24', 'pob_25a59', 'p_60ymas',
       'dens_pob_ha', 'city'],
      dtype='object')

In [10]:
v1_2024_prox_gdf.columns

Index(['hex_id', 'res', 'max_escuelas', 'max_preescolar', 'max_primaria',
       'max_secundaria', 'max_servicios comunitarios', 'max_salud',
       'max_guarderías', 'max_asistencia social', 'max_comercio',
       'max_alimentos', 'max_personal', 'max_farmacias', 'max_hogar',
       'max_complementarios', 'max_entretenimiento', 'max_social',
       'max_actividad física', 'max_cultural', 'idx_preescolar',
       'idx_primaria', 'idx_secundaria', 'idx_salud', 'idx_guarderías',
       'idx_asistencia social', 'idx_alimentos', 'idx_personal',
       'idx_farmacias', 'idx_hogar', 'idx_complementarios', 'idx_social',
       'idx_actividad física', 'idx_cultural', 'mean_time', 'median_time',
       'max_time', 'idx_sum', 'pobtot', 'dens_pob_ha', 'city', 'geometry'],
      dtype='object')

In [11]:
v1_countpois_2024_prox_gdf.columns

Index(['hex_id', 'res', 'max_escuelas', 'max_preescolar', 'max_primaria',
       'max_secundaria', 'max_servicios comunitarios', 'max_salud',
       'max_guarderías', 'max_asistencia social', 'max_comercio',
       'max_alimentos', 'max_personal', 'max_farmacias', 'max_hogar',
       'max_complementarios', 'max_entretenimiento', 'max_social',
       'max_actividad física', 'max_cultural', 'escuelas_15min',
       'preescolar_15min', 'primaria_15min', 'secundaria_15min',
       'servicios comunitarios_15min', 'salud_15min', 'guarderías_15min',
       'asistencia social_15min', 'comercio_15min', 'alimentos_15min',
       'personal_15min', 'farmacias_15min', 'hogar_15min',
       'complementarios_15min', 'entretenimiento_15min', 'social_15min',
       'actividad física_15min', 'cultural_15min', 'idx_preescolar',
       'idx_primaria', 'idx_secundaria', 'idx_salud', 'idx_guarderías',
       'idx_asistencia social', 'idx_alimentos', 'idx_personal',
       'idx_farmacias', 'idx_hogar', 'idx_