## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  ox.config(


In [2]:
city = 'CDMX'

## Load data

### Load data - Load hex_gdf (Contiene alcaldías)

In [13]:
# Download hex_gdf
hex_schema = 'hexgrid'
hex_table = 'hexgrid_10_city_2020'
query = f"SELECT * FROM {hex_schema}.{hex_table} WHERE \"city\" LIKE \'{city}\'"
hex_gdf = aup.gdf_from_query(query, geometry_col='geometry')

# Show
print(hex_gdf.shape)
hex_gdf.head(2)

(49335, 6)


Unnamed: 0,hex_id_10,geometry,CVEGEO,NOMGEO,city,type
0,8a4995b8cb57fff,"POLYGON ((-99.19358 19.48193, -99.19293 19.482...",9002,Azcapotzalco,CDMX,urban
1,8a4995b88d67fff,"POLYGON ((-99.17778 19.49754, -99.17713 19.497...",9002,Azcapotzalco,CDMX,urban


In [30]:
# Filter hex_gdf for municipalities where Cutzamala system provides water
alcaldias_cutzamala = ['Álvaro Obregón','Azcapotzalco', 'Cuajimalpa de Morelos','Iztacalco', 
                       'Iztapalapa','La Magdalena Contreras','Tlalpan', 'Benito Juárez', 
                       'Cuauhtémoc','Miguel Hidalgo', 'Venustiano Carranza']
hex_gdf_f = hex_gdf.loc[hex_gdf.NOMGEO.isin(alcaldias_cutzamala)]

# Shorten data
hex_gdf_f = hex_gdf_f[['hex_id_10','NOMGEO','geometry']]

# Show
print(hex_gdf_f.shape)
hex_gdf_f.head(2)

(49335, 3)


Unnamed: 0,hex_id_10,NOMGEO,geometry
0,8a4995b8cb57fff,Azcapotzalco,"POLYGON ((-99.19358 19.48193, -99.19293 19.482..."
1,8a4995b88d67fff,Azcapotzalco,"POLYGON ((-99.17778 19.49754, -99.17713 19.497..."


### Load data - Load NDVI (To know where there's vegetation)

In [None]:
# Downloading NDVI res 10 data ########################## When available
ndvi_schema = 'raster_analysis'
ndvi_table = 'ndvi_analysis_hex'
res = 10

query = f"SELECT * FROM {ndvi_schema}.{ndvi_table} WHERE \"city\" LIKE \'{city}\' AND \"res\" = {res}"
ndvi_res10 = aup.gdf_from_query(query, geometry_col='geometry')

# Shorten data
ndvi_res10 = ndvi_res10[['hex_id','ndvi_median','geometry']]

print(ndvi_res10.shape)
ndvi_res10.head(2)

### Load data - Load NDMI

In [21]:
# Downloading complete dataset NDMI res data (aprox. 700 s)
complete_ndmi_schema = 'raster_analysis'
complete_ndmi_table = 'ndmi_complete_dataset_hex'
res = 10
query = f"SELECT * FROM {complete_ndmi_schema}.{complete_ndmi_table} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res}"
complete_ndmi_res10 = aup.df_from_query(query)

# Filter for dates of interest
months_ofinterest = [1,2,3,4,5]
years_ofinterest = [2018,2019,2020,2021,2022,2023]
complete_ndmi_res10_f = complete_ndmi_res10.loc[(complete_ndmi_res10.month.isin(months_ofinterest)) & (complete_ndmi_res10.year.isin(years_ofinterest))]
# Shorten data
complete_ndmi_res10_f = complete_ndmi_res10_f[['hex_id','NOMGEO','ndmi','month','year']]

# Show
print(complete_ndmi_res10_f.shape)
complete_ndmi_res10_f.head(2)

(3873900, 6)


Unnamed: 0,hex_id,res,ndmi,month,year,city
0,8a4995862907fff,10,0.003396,1,2018,CDMX
1,8a4995860517fff,10,0.07049,1,2018,CDMX


## Data treatment

### Data treatment - Create vegetation categories

In [None]:
# Create vegetation categories
ndvi_res10['ndvi_median_rango'] = np.nan
ndvi_res10.loc[ndvi_res10.ndvi_median>=0.6 , 'ndvi_median_rango'] = 'Alta densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.4 )&
                (ndvi_res10.ndvi_median<0.6), 'ndvi_median_rango'] = 'Moderada densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.2)&
                (ndvi_res10.ndvi_median<0.4), 'ndvi_median_rango'] = 'Mínima densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.1)&
                (ndvi_res10.ndvi_median<0.2), 'ndvi_median_rango'] = 'Suelo'
ndvi_res10.loc[(ndvi_res10.ndvi_median<0.1), 'ndvi_median_rango'] = 'Suelo artificial/Agua/Piedra'

# Ordering data
categories = ['Suelo artificial/Agua/Piedra', 'Suelo', 'Mínima densidad vegetal', 'Moderada densidad vegetal', 'Alta densidad vegetal']
ndvi_res10['ndvi_median_rango'] = pd.Categorical(ndvi_res10['ndvi_median_rango'], categories=categories, ordered=True)

# Reviewing process
print(ndvi_res10.shape)
ndvi_res10.head(2)

Show ndvi categorical distribution

In [None]:
ndviproportions = ndvi_res10.groupby('ndvi_median_rango').count()

columns_tokeep = ['hex_id']
ndviproportions = ndviproportions[columns_tokeep]

ndviproportions = ndviproportions.rename(columns={'hex_id':'count'})
ndviproportions['%'] = ndviproportions['count'].apply(lambda x: (x/(ndviproportions['count'].sum()))*100)

ndviproportions

### Data treatment - Filter for places with vegetation

In [None]:
#Keeping High-moderate-minimal vegetation density
data_withveg = ndvi_res10.loc[(ndvi_res10.ndvi_median_rango == 'Alta densidad vegetal') |
               (ndvi_res10.ndvi_median_rango == 'Moderada densidad vegetal')|
               (ndvi_res10.ndvi_median_rango == 'Mínima densidad vegetal')].copy()

#Reviewing process
print(data_withveg.shape)
data_withveg.head(2)

### Data treatment - Merge ndmi for places with vegetation

In [None]:
ndvi_ndmi_gdf = data_withveg.merge(complete_ndmi_res10_f, on='hex_id_10',how='inner')

# Show
print(ndvi_ndmi_gdf.shape)
ndvi_ndmi_gdf.head(2)

### Data treatment - Filter for alcaldías of interest

In [None]:
# Inner join to keep hexs that are in both hex_gdf_f and ndvi_res10
data_withveg_alcaldias = ndvi_ndmi_gdf.merge(hex_gdf_f,left_on='hex_id', right_on='hex_id_10',how='inner')
data_withveg_alcaldias.drop(columns=['hex_id'],inplace=True)

## Data analysis

In [None]:
mean_ndmi_bydate = hex_gdf_f.copy()

i = 0
for year in data_withveg_alcaldias.year.unique()
    for month in data_withveg_alcaldias.month.unique()
        for alcaldia in data_withveg_alcaldias.NOMGEO.unique()

            print(f"Calculating mean ndmi for {alcaldia} date {month}/{year}.")
                
            data_gdf = data_withveg_alcaldias.loc[(data_withveg_alcaldias.year == year) &
                                                  (data_withveg_alcaldias.month == month) &
                                                  (data_withveg_alcaldias.NOMGEO == alcaldia)].copy()

            for hex_id in data_gdf['hex_id_10'].unique():
                hex_data = data_gdf.loc[data_gdf['hex_id_10'] == hex_id].copy()
                mean_ndmi = hex_data['ndmi'].mean()

                idx = mean_ndmi_bydate['hex_id_10'] == hex_id
                mean_ndmi_bydate.loc[idx,f"{year}_{month}"] = mean_ndmi