In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import folium

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

2023-02-01 20:40:30 Configured OSMnx 1.1.1
2023-02-01 20:40:30 HTTP response caching is on


## Importing and filtering data

In [2]:
#Importing hexres9 with censo 2010 and 2020
gdf = gpd.read_file("../../data/external/to_db/hex_censo_10_20_mza_res9_museocienciasambientales.gpkg")
gdf.head(2)

Unnamed: 0,hex_id_9,pobtot,pobfem,pobmas,p_0a2,p_0a2_f,p_0a2_m,p_3ymas,p_3ymas_f,p_3ymas_m,...,R_VivHab_20,R_VivDes_20,Z_RVivHab_20,R_VivHab_10,Z_RVivHab_10,Chng_RVivHab_10,DensPob_10_20,city,n_polig,geometry
0,8949ab4b0cfffff,2252.0,1146.0,1106.0,82.0,36.0,38.0,2170.0,1106.0,1064.0,...,0.898928,0.07657,0.588098,0.0,,0.898928,-48.898518,Guadalajara,2,"POLYGON ((-103.41024 20.72734, -103.40902 20.7..."
1,8949ab4b667ffff,2100.0,1078.0,1022.0,58.0,23.0,21.0,2033.0,1042.0,991.0,...,0.921418,0.041602,0.700936,0.0,,0.921418,-42.192622,Guadalajara,3,"POLYGON ((-103.39432 20.72775, -103.39310 20.7..."


In [3]:
fields_tokeep = ['pobtot', 'pobfem','pobmas','p_0a2','p_3a5','p_6a11','p_12a14','p_15a17','p_18a24','p_60ymas','hogjef_f','vph_inter','n_polig']
newgdf = gdf[fields_tokeep]
newgdf.head(2)

Unnamed: 0,pobtot,pobfem,pobmas,p_0a2,p_3a5,p_6a11,p_12a14,p_15a17,p_18a24,p_60ymas,hogjef_f,vph_inter,n_polig
0,2252.0,1146.0,1106.0,82.0,99.0,225.0,86.0,98.0,282.0,408.0,229.0,387.0,2
1,2100.0,1078.0,1022.0,58.0,89.0,183.0,103.0,70.0,221.0,356.0,227.0,337.0,3


## Data treatment

In [4]:
newgdf['p_25a59'] = newgdf['pobtot'] - (newgdf['p_0a2']+newgdf['p_3a5']+newgdf['p_6a11']+newgdf['p_12a14']+newgdf['p_15a17']+newgdf['p_18a24']+newgdf['p_60ymas'])
newgdf.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  newgdf['p_25a59'] = newgdf['pobtot'] - (newgdf['p_0a2']+newgdf['p_3a5']+newgdf['p_6a11']+newgdf['p_12a14']+newgdf['p_15a17']+newgdf['p_18a24']+newgdf['p_60ymas'])


Unnamed: 0,pobtot,pobfem,pobmas,p_0a2,p_3a5,p_6a11,p_12a14,p_15a17,p_18a24,p_60ymas,hogjef_f,vph_inter,n_polig,p_25a59
0,2252.0,1146.0,1106.0,82.0,99.0,225.0,86.0,98.0,282.0,408.0,229.0,387.0,2,972.0
1,2100.0,1078.0,1022.0,58.0,89.0,183.0,103.0,70.0,221.0,356.0,227.0,337.0,3,1020.0


### Calculate the totals per polygon

In [18]:
fields = {"pobtot":np.nansum,
         "pobfem":np.nansum,
         "pobmas":np.nansum,
         "p_0a2":np.nansum,
         "p_3a5":np.nansum,
         "p_6a11":np.nansum,
         "p_12a14":np.nansum,
         "p_15a17":np.nansum,
         "p_18a24":np.nansum,
         "p_25a59":np.nansum,
         "p_60ymas":np.nansum,
         "hogjef_f":np.nansum,
         "vph_inter":np.nansum,}
summary = newgdf.groupby("n_polig").agg(fields)
summary

Unnamed: 0_level_0,pobtot,pobfem,pobmas,p_0a2,p_3a5,p_6a11,p_12a14,p_15a17,p_18a24,p_25a59,p_60ymas,hogjef_f,vph_inter
n_polig,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,3678.0,1809.0,1841.0,112.0,112.0,236.0,97.0,96.0,278.0,2666.0,81.0,246.0,186.0
2,7339.0,3765.0,3574.0,246.0,326.0,627.0,316.0,361.0,906.0,3305.0,1252.0,701.0,1313.0
3,5791.0,2991.0,2800.0,198.0,269.0,514.0,266.0,257.0,701.0,2685.0,901.0,602.0,1012.0
4,11950.0,6162.0,5783.0,421.0,560.0,1022.0,507.0,570.0,1393.0,5439.0,2038.0,1396.0,2103.0


### Calculate the percentages per polygon

In [20]:
def calc_pctg(group):
    fields_tocalculate_pctg = ['pobfem','pobmas','p_0a2','p_3a5','p_6a11','p_12a14','p_15a17','p_18a24','p_25a59','p_60ymas']
    
    for field in fields_tocalculate_pctg:
        pctg = (group[field] / group['pobtot'])*100
        group['%'+field] = pctg
    
    return group
    

summary.groupby('n_polig').apply(calc_pctg)

Unnamed: 0_level_0,pobtot,pobfem,pobmas,p_0a2,p_3a5,p_6a11,p_12a14,p_15a17,p_18a24,p_25a59,...,%pobfem,%pobmas,%p_0a2,%p_3a5,%p_6a11,%p_12a14,%p_15a17,%p_18a24,%p_25a59,%p_60ymas
n_polig,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3678.0,1809.0,1841.0,112.0,112.0,236.0,97.0,96.0,278.0,2666.0,...,49.184339,50.054377,3.045133,3.045133,6.416531,2.637303,2.610114,7.558456,72.485046,2.202284
2,7339.0,3765.0,3574.0,246.0,326.0,627.0,316.0,361.0,906.0,3305.0,...,51.301267,48.698733,3.351955,4.442022,8.543398,4.305764,4.918926,12.345006,45.033383,17.059545
3,5791.0,2991.0,2800.0,198.0,269.0,514.0,266.0,257.0,701.0,2685.0,...,51.649111,48.350889,3.419099,4.645139,8.875842,4.593334,4.437921,12.104991,46.365049,15.558625
4,11950.0,6162.0,5783.0,421.0,560.0,1022.0,507.0,570.0,1393.0,5439.0,...,51.564854,48.393305,3.523013,4.686192,8.552301,4.242678,4.769874,11.656904,45.514644,17.054393
