# 01-create-allcity-prox-summary

This notebook analysis requested data on a proximity table in order to compare city to city data.

The notebook has a function which takes in as arguments:
- schema - (str) - Where data of interest is located
- table - (str) - Where data of interest is located
- city_list list - (list) - List of cities to query and analyse separately
- prox_column - (str) -Used to create proximity categories (0a15, 15a30, 30a45, 45a60, másde60)
- data_toagg - (str) - Data of interest to analyse by group and by proximity category
    
And creates a summary of data_toagg data by prox_column categories city by city.

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

from shapely import wkt

import matplotlib.pyplot as plt
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup



In [7]:
def create_allcity_prox_summary(schema, table, city_list, prox_column, data_toagg):
    #schema - (str) - Where data of interest is located
    #table - (str) - Where data of interest is located
    #city_list list - (list) - List of cities to query and analyse separately
    #prox_column - (str) -Used to create proximity categories (0a15, 15a30, 30a45, 45a60, másde60)
    #data_toagg - (str) - Data of interest to analyse by group and by proximity category
                           
    # Data frame where summary data by city will be storaged
    data_output = pd.DataFrame()

    for city in city_list:

        print(f'STARTING ANALYSIS FOR {city}')

        #--------------- DOWNLOAD DATA ---------------
        # Load proximity hexres9 for group
        query = f"SELECT * FROM {schema}.{table} WHERE \"city\" LIKE \'{city}\'"
        prox_city = aup.gdf_from_query(query, geometry_col='geometry')

        #--------------- PROCESS DATA ---------------
        # Create time categories
        prox_city['prox_cat'] = np.nan
        prox_city.loc[prox_city[prox_column]>60 , 'prox_cat'] = '+60'
        prox_city.loc[(prox_city[prox_column]>45 )&
                     (prox_city[prox_column]<=60), 'prox_cat'] = '45a60'
        prox_city.loc[(prox_city[prox_column]>30)&
                     (prox_city[prox_column]<=45), 'prox_cat'] = '30a45'
        prox_city.loc[(prox_city[prox_column]>15)&
                     (prox_city[prox_column]<=30), 'prox_cat'] = '15a30'
        prox_city.loc[(prox_city[prox_column]<=15), 'prox_cat'] = '0a15'

        categories = ['0a15', '15a30', '30a45', '45a60', '+60']
        prox_city['prox_cat'] = pd.Categorical(prox_city['prox_cat'], categories=categories, ordered=True)

        # Group by data
        tot_data = prox_city[data_toagg].sum()
        print(f'Total {data_toagg} in {city}: {tot_data}.')
        city_data_summary = prox_city.groupby('prox_cat').agg({data_toagg:np.sum})

        # Format and create pop_data gdf
        city_data_summary.rename(columns={data_toagg:city},inplace=True)
        data_transposed = city_data_summary.transpose()
        data_transposed['city_total'] = data_transposed['0a15']+data_transposed['15a30']+data_transposed['30a45']+data_transposed['45a60']+data_transposed['+60']
        
        data_output = pd.concat([data_output,data_transposed])

    # Finally, add share to data
    for cat in categories:
        data_output[f'{cat}_share'] = data_output[cat] / data_output['city_total']

    print(f'ANALIZED DATA FOR {city}')
    
    return data_output

### City data

In [3]:
#Load mun data
mun_schema = 'metropolis'
mun_table = 'metro_gdf'
query = f"SELECT * FROM {mun_schema}.{mun_table}" 
gdf_mun = aup.gdf_from_query(query, geometry_col='geometry')
city_list = gdf_mun.city.unique()

### Create data summary by city and by proximity category for "pobtot" in "proximityanalysis_hexres9"

In [8]:
schema = 'prox_analysis'
table = 'proximityanalysis_hexres9'
prox_column = 'max_time'
data_toagg = 'pobtot'

pop_data_output = create_allcity_prox_summary(schema, table, city_list, prox_column, data_toagg)

STARTING ANALYSIS FOR Aguascalientes
Total pobtot in Aguascalientes: 1043173.0.
STARTING ANALYSIS FOR Ensenada
Total pobtot in Ensenada: 396423.0.
STARTING ANALYSIS FOR Mexicali
Total pobtot in Mexicali: 956287.0.
STARTING ANALYSIS FOR Tijuana
Total pobtot in Tijuana: 2092757.0.
STARTING ANALYSIS FOR La Paz
Total pobtot in La Paz: 268798.0.
STARTING ANALYSIS FOR Campeche
Total pobtot in Campeche: 266692.0.
STARTING ANALYSIS FOR Laguna
Total pobtot in Laguna: 1253652.0.
STARTING ANALYSIS FOR Monclova
Total pobtot in Monclova: 366324.0.
STARTING ANALYSIS FOR Piedras Negras
Total pobtot in Piedras Negras: 205169.0.
STARTING ANALYSIS FOR Saltillo
Total pobtot in Saltillo: 995097.0.
STARTING ANALYSIS FOR Colima
Total pobtot in Colima: 348569.0.
STARTING ANALYSIS FOR Tecoman
Total pobtot in Tecoman: 129714.0.
STARTING ANALYSIS FOR Tapachula
Total pobtot in Tapachula: 251222.0.
STARTING ANALYSIS FOR Tuxtla
Total pobtot in Tuxtla: 728601.0.
STARTING ANALYSIS FOR Chihuahua
Total pobtot in Chihu

In [9]:
# Show 
pop_data_output

prox_cat,0a15,15a30,30a45,45a60,+60,city_total,0a15_share,15a30_share,30a45_share,45a60_share,+60_share
Aguascalientes,36458.0,192168.0,182111.0,155767.0,476669.0,1043173.0,0.034949,0.184215,0.174574,0.149320,0.456941
Ensenada,2158.0,35086.0,61877.0,58841.0,238461.0,396423.0,0.005444,0.088506,0.156088,0.148430,0.601532
Mexicali,10184.0,199975.0,177178.0,110849.0,458101.0,956287.0,0.010650,0.209116,0.185277,0.115916,0.479041
Tijuana,4123.0,229374.0,425852.0,418870.0,1014538.0,2092757.0,0.001970,0.109604,0.203489,0.200152,0.484785
La Paz,5516.0,74032.0,52734.0,32835.0,103681.0,268798.0,0.020521,0.275419,0.196184,0.122155,0.385721
...,...,...,...,...,...,...,...,...,...,...,...
Poza Rica,17790.0,89646.0,57100.0,43586.0,132162.0,340284.0,0.052280,0.263445,0.167801,0.128087,0.388387
Veracruz,43828.0,210241.0,161128.0,95158.0,315339.0,825694.0,0.053080,0.254623,0.195143,0.115246,0.381908
Xalapa,65232.0,205661.0,139924.0,117661.0,149659.0,678137.0,0.096193,0.303274,0.206336,0.173506,0.220691
Merida,49143.0,284660.0,237178.0,184244.0,479817.0,1235042.0,0.039791,0.230486,0.192040,0.149180,0.388503


In [10]:
pop_data_output.to_csv('../../../data/external/temporal_fromjupyter/poptot_proxhexres9.csv')

### Create data summary by city and by proximity category for "pob_0a5" in "cdcuidadoras_popageb_2020_hex"

In [None]:
schema = 'prox_analysis'
table = 'cdcuidadoras_popageb_2020_hex'
prox_column = 'max_idx_15_min'
data_toagg = 'pob_0a5'

data_output = create_allcity_prox_summary(schema, table, city_list, prox_column, data_toagg)

In [None]:
data_output.to_csv('../../../data/external/temporal_fromjupyter/pob_0a5_cdcuidado.csv')