# 27 - Scorecards statistics

This notebook __calculates different statistics for each area of interest__ and saves the resulting csv in: 
'../../../output/data/santiago/scorecards_statistics.csv'

## Import libraries

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import sys
module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

In [2]:
all_local_dirs = 'alex'

In [3]:
if all_local_dirs == 'alex':
    aois_dir = "../../../data/external/temporal_todocker/santiago/aois/"
    data_dir = "../../../data/external/temporal_todocker/santiago/proximidad/"
    pois_dir = "../../../data/external/temporal_fromjupyter/santiago/pois/"
    ndvi_dir = "../../../data/external/temporal_todocker/santiago/ndvi_santiago/"
#elif all_local_dirs == 'edgar':
    #grl_dir = "../../../data/processed/00_pois_formated/aereal_data/"
    #count_pois_dir = grl_dir
    #ndvi_dir = grl_dir
else:
    print("WARNING: Fix all_local_dirs variable")

## Load areas of interest

### AMS

In [4]:
# Old method: load from database
#query = 'SELECT * FROM projects_research.santiago_aoi'
#santiago_ams = aup.gdf_from_query(query)

# New method: Zonas censales
santiago_ams = gpd.read_file(aois_dir+'zonaurbana_zonascensales.gpkg')

# Set CRS
try:
    santiago_ams = santiago_ams.set_crs("EPSG:4326")
except:
    santiago_ams = santiago_ams.to_crs("EPSG:4326")

# Simplifying format
santiago_ams['name'] = 'ams'
santiago_ams = santiago_ams[['name','geometry']]

# Show
print(santiago_ams.shape)
santiago_ams.head(2)

(1, 2)


Unnamed: 0,name,geometry
0,ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626..."


### Comunas

In [5]:
# Load all
all_comunas = gpd.read_file(aois_dir+'santiago_comunas_zonaurbana.geojson')

# Set CRS
try:
    all_comunas = all_comunas.set_crs("EPSG:4326")
except:
    all_comunas = all_comunas.to_crs("EPSG:4326")

# Filter for comunas of interest
comunas_list = ['La Florida','Las Condes','Providencia','Pedro Aguirre Cerda','Quilicura']
comunas = all_comunas.loc[all_comunas.Comuna.isin(comunas_list)].copy()

# Simplifying format
comunas = comunas[['Comuna','geometry']]
comunas = comunas.rename(columns={'Comuna':'name'})

# Show
print(comunas.shape)
comunas.head(2)

(5, 2)


Unnamed: 0,name,geometry
6,Providencia,"MULTIPOLYGON (((-70.58484 -33.43126, -70.58445..."
20,La Florida,"MULTIPOLYGON (((-70.50766 -33.50052, -70.50770..."


### Unidades vecinales

In [6]:
# Load all
all_unidades_vecinales = gpd.read_file(aois_dir+'santiago_unidadesvecinales_zonaurbana.geojson')

# Set CRS
try:
    all_unidades_vecinales = all_unidades_vecinales.set_crs("EPSG:4326")
except:
    all_unidades_vecinales = all_unidades_vecinales.to_crs("EPSG:4326")

# Filter for uvs of interest (For 13106_11, take _A, ignore _B)
uvs_list = ['13101_3','13101_15','13101_18','13101_49','13123_14','13106_08','13106_11_A','13106_17','13117_19']
unidades_vecinales = all_unidades_vecinales.loc[all_unidades_vecinales.COD_UNICO_.isin(uvs_list)].copy()

# Simplifiying format
unidades_vecinales['name'] = unidades_vecinales['COD_UNICO_']
unidades_vecinales = unidades_vecinales[['name','geometry']]

# Show
print(unidades_vecinales.shape)
unidades_vecinales.head(2)

(9, 2)


Unnamed: 0,name,geometry
29,13101_15,"MULTIPOLYGON (((-70.67395 -33.44337, -70.67395..."
32,13101_18,"MULTIPOLYGON (((-70.66009 -33.44736, -70.66006..."


### Alameda, alameda oriente & alameda poniente

In [7]:
# ALAMEDA
if all_local_dirs == 'alex':
    alameda = gpd.read_file(aois_dir+"buffer_800m_nueva_alameda_32719.gpkg")
elif all_local_dirs == 'edgar':
    alameda = gpd.read_file(aois_dir + "buffer 800m nueva alameda.shp")
alameda['name'] = 'alameda'
alameda = alameda[['name','geometry']]
alameda = alameda.to_crs("EPSG:4326")

# ALAMEDA PONIENTE
alameda_poniente = gpd.read_file(aois_dir + 'alameda_poniente.gpkg')
alameda_poniente['name'] = 'alameda_poniente'
alameda_poniente = alameda_poniente[['name','geometry']]
alameda_poniente = alameda_poniente.to_crs("EPSG:4326")

# ALAMEDA ORIENTE
alameda_oriente = gpd.read_file(aois_dir + 'alameda_oriente.gpkg')
alameda_oriente['name'] = 'alameda_oriente'
alameda_oriente = alameda_oriente[['name','geometry']]
alameda_oriente = alameda_oriente.to_crs("EPSG:4326")

### Join all aois

In [8]:
all_aoi_gdfs = [santiago_ams,comunas,alameda,alameda_poniente,alameda_oriente,unidades_vecinales]
aoi_all = pd.concat(all_aoi_gdfs,
                    ignore_index = True, axis = 0)
aoi_all

Unnamed: 0,name,geometry
0,ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626..."
1,Providencia,"MULTIPOLYGON (((-70.58484 -33.43126, -70.58445..."
2,La Florida,"MULTIPOLYGON (((-70.50766 -33.50052, -70.50770..."
3,Las Condes,"MULTIPOLYGON (((-70.47897 -33.36728, -70.47897..."
4,Pedro Aguirre Cerda,"MULTIPOLYGON (((-70.67123 -33.51069, -70.67179..."
5,Quilicura,"MULTIPOLYGON (((-70.75679 -33.38348, -70.75679..."
6,alameda,"MULTIPOLYGON (((-70.70775 -33.45031, -70.69297..."
7,alameda_poniente,"MULTIPOLYGON (((-70.67975 -33.44347, -70.67894..."
8,alameda_oriente,"MULTIPOLYGON (((-70.67894 -33.45865, -70.67975..."
9,13101_15,"MULTIPOLYGON (((-70.67395 -33.44337, -70.67395..."


## __Statistics for each area of interest's scorecard__

In [9]:
# [DONE] Población total 
# --------------------------------------------------------------------------> unidades_vecinales.shp ['PERSONAS'] (DIRECTAMENTE para las UVs)
# --------------------------------------------------------------------------> zonas_censales_hogares_RM.shp ['persons'] (REPARTIDO para el resto)
# [DONE] Área (ha)
# --------------------------------------------------------------------------> Geometría
# [DONE] Densidad de población (pob/ha)
# -------------------------------------------------------------------------->[Población total / Área (ha)]


# [DONE] Área total de construcción (m2)
# --------------------------------------------------------------------------> usos_de_suelo.gpkg(Voronoi) [Suma de todas las áreas]
# [DONE] Uso Habitacional(%)
# --------------------------------------------------------------------------> usos_de_suelo.gpkg(Voronoi) Columna ['HABIT’]
# [DONE] Uso Equipamiento (%)
# --------------------------------------------------------------------------> usos_de_suelo.gpkg(Voronoi) Columnas [‘CULT’, ’DEPOR’, ’EDUC’, ’SALUD’]
# [DONE] Uso Comercio (%)
# --------------------------------------------------------------------------> usos_de_suelo.gpkg(Voronoi) Columna [‘COM’]
# [DONE] Uso Servicios (%)
# --------------------------------------------------------------------------> usos_de_suelo.gpkg(Voronoi) Columnas [‘ADMIN’, ‘HOTEL’, ‘MIN’, ‘OFIC’]
# [DONE] Uso Industria (%)
# --------------------------------------------------------------------------> usos_de_suelo.gpkg(Voronoi) Columna [‘INDUS’]
# [DONE] Uso Otros (%)
# --------------------------------------------------------------------------> usos_de_suelo.gpkg(Voronoi) Columnas [‘ARG’, ‘BBCC’, ‘BODEG’, ‘ERIAZO’, ‘ESTAC’, ‘OTROS’, ‘TPTE’]


# [DONE] Total de viviendas
# --------------------------------------------------------------------------> casas_deptos_mzn.gpkg(Voronoi) Columna ['total_viv']
# [DONE] Casas(%)
# --------------------------------------------------------------------------> casas_deptos_mzn.gpkg(Voronoi) Columna ['casas']
# [DONE] Departamentos (%)
# --------------------------------------------------------------------------> casas_deptos_mzn.gpkg(Voronoi) Columna ['deptos']
# [DONE] Personas por vivienda --> [Población total / Total de viviendas]
# --------------------------------------------------------------------------> [Población total / total de viviendas ocupadas]


# [DONE] Viviendas sociales
# --------------------------------------------------------------------------> 'viv_social.gpkg'(Voronoi) Columna ['Housing'], originalmente ['Total_de_v']
# [DONE] Vivienda social (%)
# --------------------------------------------------------------------------> [Viviendas sociales / Total de viviendas]


# [DONE] Total de oficinas
# --------------------------------------------------------------------------> 'oficinas.gpkg'(Voronoi) Columna ['Oficinas']

# [DONE] NDVI
# --------------------------------------------------------------------------> avg de ndvi_mean x 100 (Mejora readability)

# [DONE] Nivel socioeconómico
# --------------------------------------------------------------------------> 'bas_ismt_urbano_2022_32719.gpkg' Columnas de cada nivel socioeconómico.

In [10]:
# Create data_gdf (Will store data for each area of interest)
data_gdf = aoi_all.copy()
data_gdf.set_index('name',inplace=True)

# Area of each aoi
data_gdf = data_gdf.to_crs("EPSG:32719")
data_gdf['area(ha)'] = data_gdf.area/10000
data_gdf = data_gdf.to_crs("EPSG:4326")

# Show
print(data_gdf.shape)
data_gdf.head(1)

(18, 2)


Unnamed: 0_level_0,geometry,area(ha)
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626...",81390.798192


### __Población total y densidad de población (pob/ha)__

In [11]:
# Load data
sociodemografico_zonas_censales = gpd.read_file(data_dir + 'sociodemografico_zonas_censales_32719.gpkg')

# Set CRS
try:
    sociodemografico_zonas_censales = sociodemografico_zonas_censales.set_crs("EPSG:4326")
except:
    sociodemografico_zonas_censales = sociodemografico_zonas_censales.to_crs("EPSG:4326")

# Show
print(sociodemografico_zonas_censales.shape)
sociodemografico_zonas_censales.head(1)

(2376, 36)


Unnamed: 0,GEOCODI,REGION,NOM_REG,PROVINC,NOM_PRO,COMUNA,NOM_COM,DISTRIT,LOC_ZON,NOM_LOC,...,ACTIV_N_A,ACTIV_N_B,area_ha,Dns_prs,prc_hg_,prc_cn_,prc_dmy,pr_JHES,prc_OcA,geometry
0,13101011001,13,REGIÓN METROPOLITANA DE SANTIAGO,131,SANTIAGO,13101,SANTIAGO,1,1,,...,978.0,407.0,20.91,103.969393,0.76376,0.107536,0.133785,0.799323,0.828112,"MULTIPOLYGON (((-70.63925 -33.43663, -70.63932..."


In [12]:
# Set current data of interest
data_of_interest = sociodemografico_zonas_censales.copy()

# Find area of existing polygons (Used for representative data extraction)
data_of_interest = data_of_interest.to_crs("EPSG:32719")
data_of_interest['area'] = data_of_interest.area
data_of_interest = data_of_interest.to_crs("EPSG:4326")

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):

    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()

    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)

    # Find percentage of geometry that fell inside clipped area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:32719")
    data_of_interest_aoi['clipped_area'] = data_of_interest_aoi.area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:4326")

    data_of_interest_aoi['pct_inside'] = data_of_interest_aoi['clipped_area']/data_of_interest_aoi['area']

    # -----------------------------------------------------------------------------------------
    # Register data
    # Población total
    data_of_interest_aoi['persons_inside'] = data_of_interest_aoi['persons'] * data_of_interest_aoi['pct_inside']
    data_gdf.loc[aoi_name,'Población total'] = data_of_interest_aoi['persons_inside'].sum()

# Densidad de población (hab/ha)
data_gdf['Densidad de población (hab/ha)'] = data_gdf['Población total'] / data_gdf['area(ha)']

data_gdf.head(1)

Unnamed: 0_level_0,geometry,area(ha),Población total,Densidad de población (hab/ha)
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626...",81390.798192,6139397.0,75.431094


### Overwrite Población total and Densidad de población (hab/ha) for Unidades vecinales with Unidades vecinales layer.

In [13]:
# Load data
sociodemografico_unidades_vecinales = gpd.read_file(data_dir + 'sociodemografico_unidades_vecinales_32719.gpkg')

# Set CRS
try:
    sociodemografico_unidades_vecinales = sociodemografico_unidades_vecinales.set_crs("EPSG:4326")
except:
    sociodemografico_unidades_vecinales = sociodemografico_unidades_vecinales.to_crs("EPSG:4326")

# Show
print(sociodemografico_unidades_vecinales.shape)
sociodemografico_unidades_vecinales.head(1)

(3675, 77)


Unnamed: 0,NOMBRE_REG,CODIGO_REG,NOMBRE_COM,CUT,NOMBRE_UV,CODIGO_UV,PORC_VIV,COD_UNICO_,PERSONAS,HOMBRE,...,FARMACIAS,SALUD_ALTA,SALUD_MEDI,SALUD_BAJA,SALUD_SIN_,SALUD_PRIV,SALUD_PUBL,SALUD_OTRO,TOTAL_SALU,geometry
0,AYSÉN DEL GENERAL CARLOS IBÁÑEZ DEL CAMPO,11,COYHAIQUE,11101,CERRO LA VIRGEN,15,16.3265,11101_15,72,45,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,"MULTIPOLYGON (((-71.92013 -45.80636, -71.92012..."


In [14]:
uvs_list = ['13101_3','13101_15','13101_18','13101_49','13123_14','13106_08','13106_11_A','13106_17','13117_19']

for uv in uvs_list:
    # Remove _A from '13106_11_A'
    if uv == '13106_11_A':
        uv_name = '13106_11'
    else:
        uv_name = uv

    # Find population in Unidad Vecinal
    uv_persons = sociodemografico_unidades_vecinales.loc[sociodemografico_unidades_vecinales['COD_UNICO_'] == uv_name].PERSONAS.unique()[0]
    uv_persons = int(uv_persons)

    # Register data
    data_gdf.loc[uv,'Población total'] = uv_persons
    
# Densidad de población (hab/ha)
data_gdf['Densidad de población (hab/ha)'] = data_gdf['Población total'] / data_gdf['area(ha)']

data_gdf.head(1)

Unnamed: 0_level_0,geometry,area(ha),Población total,Densidad de población (hab/ha)
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626...",81390.798192,6139397.0,75.431094


### __Usos, espacio construido__

In [15]:
# Load data
usos_construidos = gpd.read_file(pois_dir + 'usos_de_suelo.gpkg')

# Set CRS
try:
    usos_construidos = usos_construidos.set_crs("EPSG:4326")
except:
    usos_construidos = usos_construidos.to_crs("EPSG:4326")

# Show
print(usos_construidos.shape)
usos_construidos.head(1)

(229965, 20)


Unnamed: 0,osmid,ADMIN,AGR,BBCC,BODEG,COM,CULT,DEPOR,EDUC,ERIAZO,ESTAC,HABIT,HOTEL,INDUS,MIN,OFIC,OTROS,SALUD,TPTE,geometry
0,386139,0.0,0.0,0.520425,163.909199,769.964416,27.715437,0.0,13.781373,0.0,0.0,203.073595,14.213205,0.107459,0.0,887.353986,30.780062,0.199923,0.0,POINT (-70.64725 -33.44313)


In [16]:
# Set current data of interest
data_of_interest = usos_construidos.copy()

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):
    
    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()
    
    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)
    
    # -----------------------------------------------------------------------------------------
    # ALL BUILT AREA
    
    # Find sum of all built area by node
    all_uses = ['ADMIN','AGR','BBCC','BODEG','COM','CULT','DEPOR','EDUC','ERIAZO','ESTAC','HABIT','HOTEL','INDUS','MIN','OFIC','OTROS','SALUD','TPTE']
    data_of_interest_aoi['tot_area'] = data_of_interest_aoi[all_uses].sum(axis=1)
    
    # All built area
    area_total = data_of_interest_aoi['tot_area'].sum()
    
    # -----------------------------------------------------------------------------------------
    # REGISTER DATA
    
    # Register data - Uso Habitacional (%)
    area_habitacional = data_of_interest_aoi['HABIT'].sum()
    data_gdf.loc[aoi_name,'Uso Habitacional (%)'] = area_habitacional / area_total
    
    # Register data - Uso Equipamiento (%)
    area_equipamiento = (data_of_interest_aoi['CULT'].sum() +
                         data_of_interest_aoi['DEPOR'].sum() + 
                         data_of_interest_aoi['EDUC'].sum() + 
                         data_of_interest_aoi['SALUD'].sum())
    data_gdf.loc[aoi_name,'Uso Equipamiento (%)'] = area_equipamiento / area_total
    
    # Register data - Uso Comercio (%)
    area_comercio = data_of_interest_aoi['COM'].sum()
    data_gdf.loc[aoi_name,'Uso Comercio (%)'] = area_comercio / area_total
    
    # Register data - Uso Servicios (%)
    area_servicios = (data_of_interest_aoi['ADMIN'].sum() +
                      data_of_interest_aoi['HOTEL'].sum() +
                      data_of_interest_aoi['MIN'].sum() +
                      data_of_interest_aoi['OFIC'].sum())
    data_gdf.loc[aoi_name,'Uso Servicios (%)'] = area_servicios / area_total
    
    # Register data - Uso Industria (%)
    area_industria = (data_of_interest_aoi['INDUS'].sum())
    data_gdf.loc[aoi_name,'Uso Industria (%)'] = area_industria / area_total
    
    # Register data - Uso Otros (%)
    area_otros = (data_of_interest_aoi['AGR'].sum() +
                  data_of_interest_aoi['BBCC'].sum() +
                  data_of_interest_aoi['BODEG'].sum() +
                  data_of_interest_aoi['ERIAZO'].sum() +
                  data_of_interest_aoi['ESTAC'].sum() +
                  data_of_interest_aoi['OTROS'].sum() +
                  data_of_interest_aoi['TPTE'].sum())
    data_gdf.loc[aoi_name,'Uso Otros (%)'] =  area_otros / area_total
    
    # Register data - Área total de construcción (m2)
    data_gdf.loc[aoi_name,'Área total de construcción (m2)'] = area_total

# Show
data_gdf.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

Unnamed: 0_level_0,geometry,area(ha),Población total,Densidad de población (hab/ha),Uso Habitacional (%),Uso Equipamiento (%),Uso Comercio (%),Uso Servicios (%),Uso Industria (%),Uso Otros (%),Área total de construcción (m2)
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626...",81390.798192,6139397.0,75.431094,0.606881,0.053303,0.065598,0.065289,0.045099,0.16383,228907900.0


### __Total de viviendas, casas(%), departamentos (%) y Personas por vivienda__

In [17]:
# Load data
casas_deptos_mzn = gpd.read_file(pois_dir + 'casas_deptos_mzn.gpkg')

# Set CRS
try:
    casas_deptos_mzn = casas_deptos_mzn.set_crs("EPSG:4326")
except:
    casas_deptos_mzn = casas_deptos_mzn.to_crs("EPSG:4326")

# Show
print(casas_deptos_mzn.shape)
casas_deptos_mzn.head(1)

(214430, 7)


Unnamed: 0,osmid,total_viv,viv_ocupa,casas,deptos,otros,geometry
0,14609420,20.126666,18.528881,10.392544,9.664592,0.069529,POINT (-70.75478 -33.51598)


In [18]:
# Set current data of interest
data_of_interest = casas_deptos_mzn.copy()

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):
    
    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()
    
    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)
    
    # -----------------------------------------------------------------------------------------
    # REGISTER DATA

    # Total de viviendas
    total_viv = data_of_interest_aoi['total_viv'].sum()
    data_gdf.loc[aoi_name,'Total de viviendas'] = total_viv

    # % Casas
    data_gdf.loc[aoi_name,'Casas(%)'] = data_of_interest_aoi['casas'].sum() / total_viv

    # % Casas
    data_gdf.loc[aoi_name,'Departamentos(%)'] = data_of_interest_aoi['deptos'].sum() / total_viv

    # Viviendas ocupadas (Para personas por vivienda)
    data_gdf.loc[aoi_name,'viv_ocupa'] = data_of_interest_aoi['viv_ocupa'].sum()
    
# Personas por vivienda
data_gdf['Personas por vivienda'] = data_gdf['Población total']/data_gdf['viv_ocupa']
data_gdf.drop(columns=['viv_ocupa'],inplace=True)

# Show
data_gdf.head(1)

Unnamed: 0_level_0,geometry,area(ha),Población total,Densidad de población (hab/ha),Uso Habitacional (%),Uso Equipamiento (%),Uso Comercio (%),Uso Servicios (%),Uso Industria (%),Uso Otros (%),Área total de construcción (m2),Total de viviendas,Casas(%),Departamentos(%),Personas por vivienda
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626...",81390.798192,6139397.0,75.431094,0.606881,0.053303,0.065598,0.065289,0.045099,0.16383,228907900.0,2058372.0,0.634475,0.341428,3.26263


### __Viviendas sociales y Vivienda social (%)__

In [19]:
# Load data
viviendas_sociales = gpd.read_file(pois_dir + 'viv_social.gpkg')

# Set CRS
try:
    viviendas_sociales = viviendas_sociales.set_crs("EPSG:4326")
except:
    viviendas_sociales = viviendas_sociales.to_crs("EPSG:4326")

# Show
print(viviendas_sociales.shape)
viviendas_sociales.head(1)

(34639, 3)


Unnamed: 0,osmid,housing,geometry
0,14528355,0.662426,POINT (-70.69014 -33.45685)


In [20]:
# Set current data of interest
data_of_interest = viviendas_sociales.copy()

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):
    
    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()
    
    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)
    
    # -----------------------------------------------------------------------------------------
    # REGISTER DATA

    # Total de viviendas sociales
    total_viv_soc = data_of_interest_aoi['housing'].sum()
    data_gdf.loc[aoi_name,'Viviendas sociales'] = total_viv_soc

# Vivienda social (%)
data_gdf['Vivienda social (%)'] = data_gdf['Viviendas sociales'] / data_gdf['Total de viviendas']

data_gdf.head(1)

Unnamed: 0_level_0,geometry,area(ha),Población total,Densidad de población (hab/ha),Uso Habitacional (%),Uso Equipamiento (%),Uso Comercio (%),Uso Servicios (%),Uso Industria (%),Uso Otros (%),Área total de construcción (m2),Total de viviendas,Casas(%),Departamentos(%),Personas por vivienda,Viviendas sociales,Vivienda social (%)
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626...",81390.798192,6139397.0,75.431094,0.606881,0.053303,0.065598,0.065289,0.045099,0.16383,228907900.0,2058372.0,0.634475,0.341428,3.26263,260254.974832,0.126437


### __Oficinas__

In [21]:
# Load data
oficinas = gpd.read_file(pois_dir + 'oficinas.gpkg')

# Set CRS
try:
    oficinas = oficinas.set_crs("EPSG:4326")
except:
    oficinas = oficinas.to_crs("EPSG:4326")

# Show
print(oficinas.shape)
oficinas.head(1)

(229965, 3)


Unnamed: 0,osmid,Oficinas,geometry
0,386139,12.820194,POINT (-70.64725 -33.44313)


In [22]:
# Set current data of interest
data_of_interest = oficinas.copy()

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):
    
    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()
    
    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)
    
    # -----------------------------------------------------------------------------------------
    # REGISTER DATA

    # Total de oficinas
    total_oficinas = data_of_interest_aoi['Oficinas'].sum()
    data_gdf.loc[aoi_name,'Total de oficinas'] = total_oficinas

data_gdf.head(1)

Unnamed: 0_level_0,geometry,area(ha),Población total,Densidad de población (hab/ha),Uso Habitacional (%),Uso Equipamiento (%),Uso Comercio (%),Uso Servicios (%),Uso Industria (%),Uso Otros (%),Área total de construcción (m2),Total de viviendas,Casas(%),Departamentos(%),Personas por vivienda,Viviendas sociales,Vivienda social (%),Total de oficinas
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626...",81390.798192,6139397.0,75.431094,0.606881,0.053303,0.065598,0.065289,0.045099,0.16383,228907900.0,2058372.0,0.634475,0.341428,3.26263,260254.974832,0.126437,79030.122855


### __NDVI__

In [23]:
# Load data
ndvi_gdf = gpd.read_file(ndvi_dir + f'Santiago_ndvi_HexRes10_v0.geojson')
ndvi_gdf = ndvi_gdf[['ndvi_mean','geometry']]

# Show
print(ndvi_gdf.shape)
ndvi_gdf.head(1)

(64190, 2)


Unnamed: 0,ndvi_mean,geometry
0,0.135445,"POLYGON ((-70.67723 -33.43729, -70.67786 -33.4..."


In [24]:
# Set current data of interest
data_of_interest = ndvi_gdf.copy()

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):

    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()

    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)

    # -----------------------------------------------------------------------------------------
    # Register data
    # NDVI (Multiplied by 100 to improve readability)
    data_gdf.loc[aoi_name,'NDVI'] = (data_of_interest_aoi['ndvi_mean'].mean())*100

data_gdf.head(1)

Unnamed: 0_level_0,geometry,area(ha),Población total,Densidad de población (hab/ha),Uso Habitacional (%),Uso Equipamiento (%),Uso Comercio (%),Uso Servicios (%),Uso Industria (%),Uso Otros (%),Área total de construcción (m2),Total de viviendas,Casas(%),Departamentos(%),Personas por vivienda,Viviendas sociales,Vivienda social (%),Total de oficinas,NDVI
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626...",81390.798192,6139397.0,75.431094,0.606881,0.053303,0.065598,0.065289,0.045099,0.16383,228907900.0,2058372.0,0.634475,0.341428,3.26263,260254.974832,0.126437,79030.122855,18.142911


### __Nivel socioeconómico__

In [25]:
# Load data
socioeconomico_urbano = gpd.read_file(data_dir + 'bas_ismt_urbano_2022_32719.gpkg')

# Set CRS
try:
    socioeconomico_urbano = socioeconomico_urbano.set_crs("EPSG:4326")
except:
    socioeconomico_urbano = socioeconomico_urbano.to_crs("EPSG:4326")
    
# Show
print(socioeconomico_urbano.shape)
socioeconomico_urbano.head(1)

(1863, 46)


Unnamed: 0,objectid,cod_region,nom_region,cod_provin,nom_provin,cod_comuna,nom_comuna,geocodigo,urbano,distrito,...,alleg,escolar,mat_acept,mat_recup,mat_irrec,ind_mat,poblacion,st_area_sh,st_length_,geometry
0,1,13,Metropolitana de Santiago,131,Santiago,13101,Santiago,13101011001,SANTIAGO,1,...,196.0,15.93956,1091.0,1.0,0.0,8.999084,2174,209145.642793,2200.895816,"MULTIPOLYGON (((-70.64101 -33.43757, -70.64103..."


In [26]:
# ---------------------------------------------------------------------------------------
# COMUNAS DATA ALREADY IN socioeconomico_urbano GDF.

# Group by data adding each nivel socioeconomico
by_comunas = socioeconomico_urbano.groupby('nom_comuna').agg({'ab':np.sum,
                                                              'c1':np.sum,
                                                              'c2':np.sum,
                                                              'c3':np.sum,
                                                              'd':np.sum,
                                                              'e':np.sum})

# For each comuna, for each nivel socioeconómico, register data.
comunas_list = ['La Florida','Las Condes','Providencia','Pedro Aguirre Cerda','Quilicura']
niveles_socioeconomicos = ['ab','c1','c2','c3','d','e']
for comuna in comunas_list:
    for n_socio in niveles_socioeconomicos:
        data_gdf.loc[comuna,n_socio] = by_comunas.loc[comuna,n_socio]

# ---------------------------------------------------------------------------------------
# FOR BUFFER ALAMEDA, ALAMEDA PONIENTE AND ALAMEDA ORIENTE, consider inside if more than 50% of area inside.
areas_of_interest = ['alameda','alameda_poniente','alameda_oriente']

# Set current data of interest
data_of_interest = socioeconomico_urbano.copy()

# Find area of existing polygons (Used for deciding if inside or not)
data_of_interest = data_of_interest.to_crs("EPSG:32719")
data_of_interest['area'] = data_of_interest.area
data_of_interest = data_of_interest.to_crs("EPSG:4326")

# Data extraction by area of interest
for aoi_name in areas_of_interest:

    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()

    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)

    # Find percentage of geometry that fell inside clipped area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:32719")
    data_of_interest_aoi['clipped_area'] = data_of_interest_aoi.area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:4326")

    data_of_interest_aoi['pct_inside'] = data_of_interest_aoi['clipped_area']/data_of_interest_aoi['area']

    # Keep only if pct_inside is greater than 50%.
    clipped_data_of_interest_aoi = data_of_interest_aoi.loc[data_of_interest_aoi.pct_inside >= 0.5]

    # Register data for each n_socio in current area of interest
    for n_socio in niveles_socioeconomicos:
        data_gdf.loc[aoi_name,n_socio] = clipped_data_of_interest_aoi[n_socio].sum()

data_gdf.head(1)

Unnamed: 0_level_0,geometry,area(ha),Población total,Densidad de población (hab/ha),Uso Habitacional (%),Uso Equipamiento (%),Uso Comercio (%),Uso Servicios (%),Uso Industria (%),Uso Otros (%),...,Viviendas sociales,Vivienda social (%),Total de oficinas,NDVI,ab,c1,c2,c3,d,e
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ams,"MULTIPOLYGON (((-70.80625 -33.58906, -70.80626...",81390.798192,6139397.0,75.431094,0.606881,0.053303,0.065598,0.065289,0.045099,0.16383,...,260254.974832,0.126437,79030.122855,18.142911,,,,,,


In [27]:
data_gdf.to_csv('../../../output/data/santiago/scorecards_statistics.csv')

# OLD CODE

In [28]:
old = """
# Set current data of interest
data_of_interest = usos_construidos.copy()

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):

    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()

    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)

    # Find percentage of geometry that fell inside clipped area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:32719")
    data_of_interest_aoi['clipped_area'] = data_of_interest_aoi.area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:4326")

    data_of_interest_aoi['pct_inside'] = data_of_interest_aoi['clipped_area']/data_of_interest_aoi['area']

    # -----------------------------------------------------------------------------------------
    # Prepare data - All built uses
    all_uses = ['ADMIN','AGR','BBCC','BODEG','COM','CULT','DEPOR','EDUC','ERIAZO',
                'ESTAC','HABIT','HOTEL','INDUS','MIN','OFIC','OTROS','SALUD','TPTE']

    # Prepare data - clip built construction areas according to pct of block that falls inside aoi
    clipped_uses = []
    for use in all_uses:
        data_of_interest_aoi[f'{use}_inside'] = data_of_interest_aoi[use] * data_of_interest_aoi['pct_inside']
        clipped_uses.append(f'{use}_inside')

    # Find sum of all built area by block
    data_of_interest_aoi['tot_area'] = data_of_interest_aoi[clipped_uses].sum(axis=1)

    # -----------------------------------------------------------------------------------------
    # Register data - Uso Habitacional (%)
    area_habitacional = data_of_interest_aoi['HABIT_inside'].sum()
    data_gdf.loc[aoi_name,'Uso Habitacional (%)'] = area_habitacional / data_of_interest_aoi['tot_area'].sum()
    
    # Register data - Uso Equipamiento (%)
    area_equipamiento = (data_of_interest_aoi['CULT_inside'].sum() +
                         data_of_interest_aoi['DEPOR_inside'].sum() + 
                         data_of_interest_aoi['EDUC_inside'].sum() + 
                         data_of_interest_aoi['SALUD_inside'].sum())
    data_gdf.loc[aoi_name,'Uso Equipamiento (%)'] = area_equipamiento / data_of_interest_aoi['tot_area'].sum()
    
    # Register data - Uso Comercio (%)
    area_comercio = data_of_interest_aoi['COM_inside'].sum()
    data_gdf.loc[aoi_name,'Uso Comercio (%)'] = area_comercio / data_of_interest_aoi['tot_area'].sum()
    
    # Register data - Uso Servicios (%)
    area_servicios = (data_of_interest_aoi['ADMIN_inside'].sum() +
                      data_of_interest_aoi['HOTEL_inside'].sum() +
                      data_of_interest_aoi['MIN_inside'].sum() +
                      data_of_interest_aoi['OFIC_inside'].sum())
    data_gdf.loc[aoi_name,'Uso Servicios (%)'] = area_servicios / data_of_interest_aoi['tot_area'].sum()
    
    # Register data - Uso Industria (%)
    area_industria = (data_of_interest_aoi['INDUS_inside'].sum())
    data_gdf.loc[aoi_name,'Uso Industria (%)'] = area_industria / data_of_interest_aoi['tot_area'].sum()
    
    # Register data - Uso Otros (%)
    area_otros = (data_of_interest_aoi['AGR_inside'].sum() +
                  data_of_interest_aoi['BBCC_inside'].sum() +
                  data_of_interest_aoi['BODEG_inside'].sum() +
                  data_of_interest_aoi['ERIAZO_inside'].sum() +
                  data_of_interest_aoi['ESTAC_inside'].sum() +
                  data_of_interest_aoi['OTROS_inside'].sum() +
                  data_of_interest_aoi['TPTE_inside'].sum())
    data_gdf.loc[aoi_name,'Uso Otros (%)'] =  area_otros / data_of_interest_aoi['tot_area'].sum()

    # Register data - Área total de construcción (m2)
    data_gdf.loc[aoi_name,'Área total de construcción (m2)'] = area_habitacional + area_equipamiento + area_comercio + area_servicios + area_industria + area_otros

# Checking if all uses sum 100%
data_gdf['Uso_sum'] = (data_gdf['Uso Habitacional (%)']+data_gdf['Uso Equipamiento (%)']+data_gdf['Uso Comercio (%)']+data_gdf['Uso Servicios (%)']
+data_gdf['Uso Industria (%)']+data_gdf['Uso Otros (%)'])

# Show
data_gdf """

In [29]:
old = """
# Set current data of interest
data_of_interest = casas_deptos_mzn.copy()

# Find area of existing polygons (Used for representative data extraction)
data_of_interest = data_of_interest.to_crs("EPSG:32719")
data_of_interest['area'] = data_of_interest.area
data_of_interest = data_of_interest.to_crs("EPSG:4326")

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):

    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()

    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)

    # Find percentage of geometry that fell inside clipped area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:32719")
    data_of_interest_aoi['clipped_area'] = data_of_interest_aoi.area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:4326")

    data_of_interest_aoi['pct_inside'] = data_of_interest_aoi['clipped_area']/data_of_interest_aoi['area']

    # -----------------------------------------------------------------------------------------
    # Register data
    # Total de viviendas
    data_of_interest_aoi['total_viv_inside'] = data_of_interest_aoi['total_viv'] * data_of_interest_aoi['pct_inside']
    total_viv_inside = data_of_interest_aoi['total_viv_inside'].sum()
    data_gdf.loc[aoi_name,'Total de viviendas'] = total_viv_inside

    # % Casas
    data_of_interest_aoi['casas_inside'] = data_of_interest_aoi['casas'] * data_of_interest_aoi['pct_inside']
    data_gdf.loc[aoi_name,'Casas(%)'] = data_of_interest_aoi['casas_inside'].sum() / total_viv_inside

    # % Departamentos
    data_of_interest_aoi['deptos_inside'] = data_of_interest_aoi['deptos'] * data_of_interest_aoi['pct_inside']
    data_gdf.loc[aoi_name,'Departamentos(%)'] = data_of_interest_aoi['deptos_inside'].sum() / total_viv_inside

# Personas por vivienda
data_gdf['Personas por vivienda'] = data_gdf['Población total']/data_gdf['Total de viviendas']

data_gdf"""

In [30]:
old = """
# Set current data of interest
data_of_interest = viviendas_sociales.copy()

# Find area of existing polygons (Used for representative data extraction)
data_of_interest = data_of_interest.to_crs("EPSG:32719")
data_of_interest['area'] = data_of_interest.area
data_of_interest = data_of_interest.to_crs("EPSG:4326")

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):

    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()

    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)

    # Find percentage of geometry that fell inside clipped area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:32719")
    data_of_interest_aoi['clipped_area'] = data_of_interest_aoi.area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:4326")

    data_of_interest_aoi['pct_inside'] = data_of_interest_aoi['clipped_area']/data_of_interest_aoi['area']

    # -----------------------------------------------------------------------------------------
    # Register data
    # Viviendas sociales
    data_of_interest_aoi['Total_de_v_inside'] = data_of_interest_aoi['Total_de_v'] * data_of_interest_aoi['pct_inside']
    data_gdf.loc[aoi_name,'Viviendas sociales'] = data_of_interest_aoi['Total_de_v_inside'].sum()


# Vivienda social (%)
data_gdf['Vivienda social (%)'] = data_gdf['Viviendas sociales']/data_gdf['Total de viviendas']

data_gdf"""

In [31]:
old = """
# Set current data of interest
data_of_interest = oficinas.copy()

# Find area of existing polygons (Used for representative data extraction)
data_of_interest = data_of_interest.to_crs("EPSG:32719")
data_of_interest['area'] = data_of_interest.area
data_of_interest = data_of_interest.to_crs("EPSG:4326")

# Data extraction by area of interest
for aoi_name in list(aoi_all.name.unique()):

    # Extract area of interest geometry
    aoi = aoi_all.loc[aoi_all.name==aoi_name].copy()

    # Clip data for area of interest
    data_of_interest_aoi = gpd.clip(data_of_interest, aoi)

    # Find percentage of geometry that fell inside clipped area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:32719")
    data_of_interest_aoi['clipped_area'] = data_of_interest_aoi.area
    data_of_interest_aoi = data_of_interest_aoi.to_crs("EPSG:4326")

    data_of_interest_aoi['pct_inside'] = data_of_interest_aoi['clipped_area']/data_of_interest_aoi['area']

    # -----------------------------------------------------------------------------------------
    # Register data
    # Total de oficinas
    data_of_interest_aoi['Oficinas_inside'] = data_of_interest_aoi['Oficinas'] * data_of_interest_aoi['pct_inside']
    data_gdf.loc[aoi_name,'Total de oficinas'] = data_of_interest_aoi['Oficinas_inside'].sum()

data_gdf"""