# 00-variables_socioeconomic_level

This notebook calculates the socioeconomic level (AMAI) for a given city in either hexres 8 or hexres 9.

## Import libraries

In [1]:
from pathlib import Path

current_path = Path().resolve()

for parent in current_path.parents:
    if parent.name == "accesibilidad-urbana":
        project_root = parent
        break

print(project_root)

/home/observatorio/Documents/repos/accesibilidad-urbana


In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

from sklearn.preprocessing import StandardScaler

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(project_root))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup
else:
    import aup

## Notebook config

In [None]:
city = 'Guadalajara'
res = 9
output_dir = str(project_root)+f"/data/external/amai_gdl/amai_gdf_res{res}.gpkg"

## Load data

In [None]:
censo_schema = 'sociodemografico'
censo_table = 'censo_inegi_20_ageb_hex'

query = f"SELECT * FROM {censo_schema}.{censo_table} WHERE \"city\" = '{city}\' AND \"res\" = '{res}\'"
censo_gdf = aup.gdf_from_query(query, geometry_col='geometry')

# Show
print(censo_gdf.shape)
censo_gdf.head(2)

In [None]:
def pca(df, columns):
    """Create Principal Component Analysis from DataFrame and return Eigenvalues and Eigenvectors

    Args:
        df {pandas.DataFrame} -- DataFrame containing numeric columns for PCA
        columns {list} -- list with column names for PCA

    Returns:
        numpy.array, numpy.array -- array of Eigenvalues, array of Eigenvectors
    """

    features = np.array(df[columns]).T
    cov_matrix = np.cov(features)
    values, vectors = np.linalg.eig(cov_matrix)

    return values, vectors

def amai_nse(df, group_column_name):
    """Calculate Socioeconomic Levels (NSE) according to AMAI

    Args:
        df {pandas.DataFrame} -- DataFrame containing sociodemographic data for analysis at AGEB level
        df_nse {pandas.DataFrame} -- DataFrame with percentage of population by NSE and state
        state_name {string}: state name

    Returns:
        pandas.DataFrame -- DataFrame containing NSE for the specified state
    """
    #calculate indexes
    df['Idx_PC'] = df['vph_pc'].values / df[['vph_pc','vph_refri','vph_lavad','vph_tv']].max(axis=1)
    df['Idx_School'] = df['graproes'] / df['graproes'].max()
    df['Idx_PosBas'] = df['p18ym_pb'] / df['p_18ymas']

    #remove null in all columns
    idx = (df.Idx_PC.isnull())&(df.Idx_School.isnull())&(df.Idx_PosBas.isnull())
    df = df[~idx].copy()

    #fill missing data
    df['Idx_PC'].fillna((df[['Idx_PC','Idx_School','Idx_PosBas']].mean(axis=1)),inplace=True)
    df['Idx_School'].fillna((df[['Idx_PC','Idx_School','Idx_PosBas']].mean(axis=1)),inplace=True)
    df['Idx_PosBas'].fillna((df[['Idx_PC','Idx_School','Idx_PosBas']].mean(axis=1)),inplace=True)

    #pca
    values, vectors = pca(df, columns=['Idx_PC','Idx_School','Idx_PosBas'])

    #calculate nse index
    df['NSE_Idx'] = df.apply (
        lambda row: (values[0]*vectors[0,0]*row.loc['Idx_PC']+
                    values[0]*vectors[1,0]*row.loc['Idx_School']+
                    values[0]*vectors[2,0]*row.loc['Idx_PosBas']), axis=1)

    #standarize nse index values
    df['NSE_Idx'] = StandardScaler().fit_transform(np.array(df['NSE_Idx']).reshape(-1, 1))

    #sort by nse index
    df.sort_values('NSE_Idx', inplace=True)

    #calculate cumulative population and percentage
    df = df.assign(sum=df.pobtot.cumsum())
    df['pct_pob'] = df['sum'] / df['sum'].max()

    #filter DataFrame
    df = df[[group_column_name,'pobtot','Idx_PC','Idx_School',
    'Idx_PosBas','NSE_Idx']].copy()

    df.columns = df.columns.str.lower()

    # df[[c for c in df if c not in ['geometry']] + ['geometry']]

    return df

In [None]:
# Calculate nse_idx
amai_df = amai_nse(censo_gdf, 'hex_id')
# Merge dataframe with geometry
amai_gdf = pd.merge(amai_df, censo_gdf[['hex_id','geometry']],on='hex_id')
# Turn into geodataframe
amai_gdf = gpd.GeoDataFrame(amai_gdf, geometry='geometry', crs='EPSG:4326')

# Show
print(amai_gdf.crs)
print(amai_gdf.shape)
amai_gdf.head(2)

In [None]:
# Save result
amai_gdf.to_file(output_dir)