## Import libraries

In [4]:
accesibilidad_urbana = "../../../"

In [5]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(accesibilidad_urbana))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

## Notebook config

In [6]:
script27_output_dir = accesibilidad_urbana + "data/external/santiago/output/"

## Load data

In [9]:
# red_buena_calidad (project_01)
baseline_hexproximity = gpd.read_file(script27_output_dir + "project_01/santiago_hexproximity_project_01.gpkg")
baseline_hexvariablesanalysis = gpd.read_file(script27_output_dir + "project_01/santiago_hexvariablesanalysis_project_01.gpkg")
baseline_hexanalysis = gpd.read_file(script27_output_dir + "project_01/santiago_hexanalysis_project_01.gpkg")

# red_buena_calidad_pza_italia (project_02)
plazaitalia_hexproximity = gpd.read_file(script27_output_dir + "project_02/santiago_hexproximity_project_02.gpkg")
plazaitalia_hexvariablesanalysis = gpd.read_file(script27_output_dir + "project_02/santiago_hexvariablesanalysis_project_02.gpkg")
plazaitalia_hexanalysis = gpd.read_file(script27_output_dir + "project_02/santiago_hexanalysis_project_02.gpkg")

# red_buena_calidad_norte_sur (project_03)
nortesur_hexproximity = gpd.read_file(script27_output_dir + "project_03/santiago_hexproximity_project_03.gpkg")
nortesur_hexvariablesanalysis = gpd.read_file(script27_output_dir + "project_03/santiago_hexvariablesanalysis_project_03.gpkg")
nortesur_hexanalysis = gpd.read_file(script27_output_dir + "project_03/santiago_hexanalysis_project_03.gpkg")

# red_buena_calidad_parque_bueras (project_04)
parquebueras_hexproximity = gpd.read_file(script27_output_dir + "project_04/santiago_hexproximity_project_04.gpkg")
parquebueras_hexvariablesanalysis = gpd.read_file(script27_output_dir + "project_04/santiago_hexvariablesanalysis_project_04.gpkg")
parquebueras_hexanalysis = gpd.read_file(script27_output_dir + "project_04/santiago_hexanalysis_project_04.gpkg")

In [10]:
print(baseline_hexproximity.crs)
print(baseline_hexvariablesanalysis.crs)
print(baseline_hexanalysis.crs)

EPSG:4326
EPSG:4326
EPSG:4326


## Data comparisons - Proximity

This cell compares the baseline proximity to any given project proximity and saves the result to a gdf. The gdf contains:
* The difference for each attribute in a column (for each hex).
* Whether there was a possitive (increase) or negative (decrease) change in ANY time or count column (for each hex).

__Result:__ There IS a problem. Some hexs (not those directly below project interventions/new node creations) registered more time before the project than after.

In [43]:
# Comparing projects: plazaitalia_hexproximity, nortesur_hexproximity, parquebueras_hexproximity
# Comparing projects ids: '02', '03', '04'
comparing_project = parquebueras_hexproximity.copy()
project_id = '04'
# Save result to local?
save = True

def compare_proximity(comparing_project,project_id,save):
    # ----------
    attributes_list = ['carniceria_time','carniceria_count_15min','hogar_time','hogar_count_15min','bakeries_time','bakeries_count_15min',
                       'supermercado_time','supermercado_count_15min','banco_time','banco_count_15min','ferias_time','ferias_count_15min',
                       'local_mini_market_time','local_mini_market_count_15min','correos_time','correos_count_15min','centro_recyc_time','centro_recyc_count_15min',
                       'hospital_priv_time','hospital_priv_count_15min','hospital_pub_time','hospital_pub_count_15min','clinica_priv_time','clinica_priv_count_15min',
                       'clinica_pub_time','clinica_pub_count_15min','farmacia_time','farmacia_count_15min','vacunatorio_priv_time','vacunatorio_priv_count_15min',
                       'vacunatorio_pub_time','vacunatorio_pub_count_15min','consult_ado_priv_time','consult_ado_priv_count_15min','consult_ado_pub_time','consult_ado_pub_count_15min',
                       'salud_mental_time','salud_mental_count_15min','labs_priv_time','labs_priv_count_15min','residencia_adumayor_time','residencia_adumayor_count_15min',
                       'eq_deportivo_priv_time','eq_deportivo_priv_count_15min','eq_deportivo_pub_time','eq_deportivo_pub_count_15min','club_deportivo_time','club_deportivo_count_15min',
                       'civic_office_time','civic_office_count_15min','tax_collection_time','tax_collection_count_15min','social_security_time','social_security_count_15min',
                       'police_time','police_count_15min','bomberos_time','bomberos_count_15min','museos_priv_time','museos_priv_count_15min','museos_pub_time','museos_pub_count_15min',
                       'cines_time','cines_count_15min','sitios_historicos_time','sitios_historicos_count_15min','restaurantes_bar_cafe_time','restaurantes_bar_cafe_count_15min',
                       'librerias_time','librerias_count_15min','ep_plaza_small_time','ep_plaza_small_count_15min','ep_plaza_big_time','ep_plaza_big_count_15min',
                       'edu_basica_pub_time','edu_basica_pub_count_15min','edu_media_pub_time','edu_media_pub_count_15min','jardin_inf_pub_time','jardin_inf_pub_count_15min',
                       'universidad_time','universidad_count_15min','edu_tecnica_time','edu_tecnica_count_15min','edu_adultos_pub_time','edu_adultos_pub_count_15min',
                       'edu_especial_pub_time','edu_especial_pub_count_15min','bibliotecas_time','bibliotecas_count_15min','centro_edu_amb_time','centro_edu_amb_count_15min',
                       'paradas_tp_ruta_time','paradas_tp_ruta_count_15min','paradas_tp_metro_time','paradas_tp_metro_count_15min','paradas_tp_tren_time','paradas_tp_tren_count_15min',
                       'ciclovias_time','ciclovias_count_15min','estaciones_bicicletas_time','estaciones_bicicletas_count_15min']
    
    # ---------- Merge baseline and comparing project data
    both_gdfs = pd.merge(baseline_hexproximity,comparing_project[['hex_id']+proximity_columns],on='hex_id')
    
    # ---------- Compare baseline (old) and project (new) attributes, saving the difference in a col and
    #            identifying hexs where time or count increased or decreased for any attribute.
    
    # Set to empty/0
    compare_list = []
    both_gdfs['time_increase'] = 0
    both_gdfs['time_decrease'] = 0
    both_gdfs['count_increase'] = 0
    both_gdfs['count_decrease'] = 0
    
    # Iterate over each attribute
    for attribute in attributes_list:
    
        # Find attribute difference
        old_attribute = f"{attribute}_x"
        new_attribute = f"{attribute}_y"
        both_gdfs[f"{attribute}_diff"] = both_gdfs[new_attribute] - both_gdfs[old_attribute]
    
        # Register positive or negative difference
        if 'time' in attribute:
            idx_1 = both_gdfs[f"{attribute}_diff"]>0
            both_gdfs.loc[idx_1,'time_increase'] = 1
            idx_2 = both_gdfs[f"{attribute}_diff"]<0
            both_gdfs.loc[idx_2,'time_decrease'] = 1
            
        elif 'count' in attribute:
            idx_3 = both_gdfs[f"{attribute}_diff"]>0
            both_gdfs.loc[idx_3,'count_increase'] = 1
            idx_4 = both_gdfs[f"{attribute}_diff"]<0
            both_gdfs.loc[idx_4,'count_decrease'] = 1
    
    # Save result
    if save:
        both_gdfs.to_file(script27_output_dir + f"project_{project_id}_proximity_changes.gpkg", driver='GPKG')

## Finding problem

Project_02(plaza italia) presents no problem, while Project_03(norte sur) and Project_04(parque bueras) have problems.

### Finding problem - Test 1: Is the network/code the problem? Or is the problem located in project_03 and project_04 specifically?

* __Approach:__ Re-do network process in QGIS for Project_00(baseline) and project_01(plaza_italia) and re-run __without changing code.__
* __Result:__ Re-doing the network and re-running plaza italia resulted in Plaza Italia having problems. But could also be the code, not the network.

### Finding problem - Test 2: Was the algorithm changed between projects?

* __Approach:__ Run Project_00(baseline) and project_01(plaza_italia) again __without changing anything (same old existing network)__ and re-test to see if problems emerge.
* __Result:__ Re-using the old network results in the correct result for Plaza Italia. __The problem is located in how the network is created.__

In [45]:
baseline_hexproximity = gpd.read_file(script27_output_dir + "project_01/santiago_hexproximity_project_01.gpkg")
plazaitalia_hexproximity = gpd.read_file(script27_output_dir + "project_02/santiago_hexproximity_project_02.gpkg")

compare_proximity(plazaitalia_hexproximity,project_id='02test02',save=True)

  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)


### Finding problem - Test 03: Invert QGIS process

#### Instead of:
1. __Split lines with lines__
2. __Vector > Geometry Tools > Multipart to singleparts__
3. Extract specific vertices [0,-1]
4. MMQGIS Modify > Drop dups
#### Do:
1. __Vector > Geometry Tools > Multipart to singleparts__
2. __Split lines with lines__
3. Extract specific vertices [0,-1]
4. MMQGIS Modify > Drop dups

__Result:__ "NotImplementedError: Sub-geometries may have coordinate sequences, but multi-part geometries do not"