# 03_ndmi_cdmx

This notebook loads hexs with vegetation (NDVI) within cutzamala alcaldías to plot NDMI values january-may for each year.

## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  ox.config(


In [2]:
city = 'CDMX'

## Load hexs with vegetation in alcaldías of interest

### NDVI Load data - for city and res

NDVI's hex were processed using the correct urban+rural hexs configuration on Script 21.

In [3]:
# Downloading NDVI res 10 data (aprox. 150-200s)
ndvi_schema = 'raster_analysis'
ndvi_table = 'ndvi_analysis_hex'
res = 10
query = f"SELECT * FROM {ndvi_schema}.{ndvi_table} WHERE \"city\" LIKE \'{city}\' AND \"res\" = {res}"
ndvi_res10 = aup.gdf_from_query(query, geometry_col='geometry')

# Shorten data
ndvi_res10 = ndvi_res10[['hex_id','ndvi_median']]

# Show
print(ndvi_res10.shape)
ndvi_res10.head(2)

(59557, 2)


Unnamed: 0,hex_id,ndvi_median
0,8a4995b8cb57fff,0.10846
1,8a4995b8156ffff,0.194487


### NDVI Data treatment - Create vegetation categories

In [4]:
# Create vegetation categories
ndvi_res10['ndvi_median_rango'] = np.nan
ndvi_res10.loc[ndvi_res10.ndvi_median>=0.6 , 'ndvi_median_rango'] = 'Alta densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.4 )&
                (ndvi_res10.ndvi_median<0.6), 'ndvi_median_rango'] = 'Moderada densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.2)&
                (ndvi_res10.ndvi_median<0.4), 'ndvi_median_rango'] = 'Mínima densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.1)&
                (ndvi_res10.ndvi_median<0.2), 'ndvi_median_rango'] = 'Suelo'
ndvi_res10.loc[(ndvi_res10.ndvi_median<0.1), 'ndvi_median_rango'] = 'Suelo artificial/Agua/Piedra'

# Ordering data
categories = ['Suelo artificial/Agua/Piedra', 'Suelo', 'Mínima densidad vegetal', 'Moderada densidad vegetal', 'Alta densidad vegetal']
ndvi_res10['ndvi_median_rango'] = pd.Categorical(ndvi_res10['ndvi_median_rango'], categories=categories, ordered=True)

# Show
print(ndvi_res10.shape)
ndvi_res10.head(2)

(59557, 3)


Unnamed: 0,hex_id,ndvi_median,ndvi_median_rango
0,8a4995b8cb57fff,0.10846,Suelo
1,8a4995b8156ffff,0.194487,Suelo


### NDVI Data treatment - Filter for places with vegetation

In [5]:
#Keeping High-moderate-minimal vegetation density
data_withveg = ndvi_res10.loc[(ndvi_res10.ndvi_median_rango == 'Alta densidad vegetal') |
               (ndvi_res10.ndvi_median_rango == 'Moderada densidad vegetal')|
               (ndvi_res10.ndvi_median_rango == 'Mínima densidad vegetal')].copy()

# Show
print(data_withveg.shape)
data_withveg.head(2)

(35317, 3)


Unnamed: 0,hex_id,ndvi_median,ndvi_median_rango
6,8a4995b8d277fff,0.28332,Mínima densidad vegetal
9,8a4995b81107fff,0.215763,Mínima densidad vegetal


### hex_gdf Load data - (Contiene alcaldías)

In [6]:
# Download hex_gdf  (aprox. 150-200s)
hex_schema = 'hexgrid'
hex_table = 'hexgrid_10_city_2020'
query = f"SELECT * FROM {hex_schema}.{hex_table} WHERE \"city\" LIKE \'{city}\'"
hex_gdf = aup.gdf_from_query(query, geometry_col='geometry')

# Show
print(hex_gdf.shape)
hex_gdf.head(2)

(86349, 6)


Unnamed: 0,hex_id_10,geometry,CVEGEO,NOMGEO,city,type
0,8a4995b8cb57fff,"POLYGON ((-99.19358 19.48193, -99.19293 19.482...",9002,Azcapotzalco,CDMX,urban
1,8a4995b88d67fff,"POLYGON ((-99.17778 19.49754, -99.17713 19.497...",9002,Azcapotzalco,CDMX,urban


### hex_gdf Data treatment - Filter for alcaldías of interest

In [32]:
# Filter hex_gdf for municipalities where Cutzamala system provides water
alcaldias_cutzamala = ['Álvaro Obregón','Azcapotzalco', 'Cuajimalpa de Morelos','Iztacalco', 
                       'Iztapalapa','La Magdalena Contreras','Tlalpan', 'Benito Juárez', 
                       'Cuauhtémoc','Miguel Hidalgo', 'Venustiano Carranza','Coyoacán']
hex_gdf_f = hex_gdf.loc[hex_gdf.NOMGEO.isin(alcaldias_cutzamala)]

# Shorten data
hex_gdf_f = hex_gdf_f[['hex_id_10','NOMGEO','geometry']]
hex_gdf_f.rename(columns={'hex_id_10':'hex_id'},inplace=True)

# Show
print(hex_gdf_f.shape)
hex_gdf_f.head(2)

(52452, 3)


Unnamed: 0,hex_id,NOMGEO,geometry
0,8a4995b8cb57fff,Azcapotzalco,"POLYGON ((-99.19358 19.48193, -99.19293 19.482..."
1,8a4995b88d67fff,Azcapotzalco,"POLYGON ((-99.17778 19.49754, -99.17713 19.497..."


### Data with veg (NDVI) + alcaldías (hex_gdf)

In [34]:
# Inner join to keep data with veg of alcaldías of interest only
data_withveg_alcaldias = data_withveg.merge(hex_gdf_f,on='hex_id',how='inner')

# Show
print(data_withveg_alcaldias.shape)
data_withveg_alcaldias.head(2)

(20701, 5)


Unnamed: 0,hex_id,ndvi_median,ndvi_median_rango,NOMGEO,geometry
0,8a4995b8d277fff,0.28332,Mínima densidad vegetal,Azcapotzalco,"POLYGON ((-99.20948 19.50623, -99.20883 19.506..."
1,8a4995b81107fff,0.215763,Mínima densidad vegetal,Azcapotzalco,"POLYGON ((-99.16277 19.46651, -99.16212 19.466..."


## NDMI Load data - Load using city, res and tupple of months of analysis

In [9]:
# Complete dataset NDMI res 10 data 
complete_ndmi_schema = 'raster_analysis'
complete_ndmi_table = 'ndmi_complete_dataset_hex'
res = 10

# OPTION A - Downloading complete dataset and then filtering for months of interest (aprox. 700-1000s)
#query = f"SELECT * FROM {complete_ndmi_schema}.{complete_ndmi_table} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res}"
#complete_ndmi_res10 = aup.df_from_query(query)
#months_ofinterest = [1,2,3,4,5]
#complete_ndmi_res10_f = complete_ndmi_res10.loc[complete_ndmi_res10.month.isin(months_ofinterest)]

# OPTION B - Using hex_lsts and quering with tupple (First time 3494s, second time 3409s)
#hexs_lst = list(data_withveg_alcaldias.hex_id.unique())
#query = f"SELECT * FROM {complete_ndmi_schema}.{complete_ndmi_table} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res} AND \"hex_id\" IN {str(tuple(hexs_lst))}"
#complete_ndmi_res10 = aup.df_from_query(query)

# OPTION C - Downloading complete dataset filtering for months of interest in query (806s)
months_ofinterest = [1,2,3,4,5]
query = f"SELECT * FROM {complete_ndmi_schema}.{complete_ndmi_table} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res} AND \"month\" IN {str(tuple(months_ofinterest))}"
complete_ndmi_res10 = aup.df_from_query(query)

# Shorten data
complete_ndmi_res10_f = complete_ndmi_res10[['hex_id','ndmi','month','year']]

# Show
print(complete_ndmi_res10_f.shape)
complete_ndmi_res10_f.head(2)

(1911910, 4)


Unnamed: 0,hex_id,ndmi,month,year
0,8a4995862147fff,0.018535,1,2018
1,8a4995862907fff,0.003396,1,2018


## Data treatment

### Data treatment - Merge ndmi for places with Data with veg (NDVI) + alcaldías (hex_gdf)

In [37]:
# Inner merge to keep ndmi data for hexs with vegetation only
ndvi_ndmi_gdf = data_withveg_alcaldias.merge(complete_ndmi_res10_f, on='hex_id',how='inner')

# Show
print(ndvi_ndmi_gdf.shape)
ndvi_ndmi_gdf.head(2)

(623205, 8)


Unnamed: 0,hex_id,ndvi_median,ndvi_median_rango,NOMGEO,geometry,ndmi,month,year
0,8a4995b8d277fff,0.28332,Mínima densidad vegetal,Azcapotzalco,"POLYGON ((-99.20948 19.50623, -99.20883 19.506...",0.056854,1,2018
1,8a4995b8d277fff,0.28332,Mínima densidad vegetal,Azcapotzalco,"POLYGON ((-99.20948 19.50623, -99.20883 19.506...",0.051589,2,2018


## Data analysis

### Método elegido - Group by + transform data

In [38]:
# Isolate categories and data
data_analysis = ndvi_ndmi_gdf[['hex_id','year','ndmi']].copy()

# Group by
grouped_by = data_analysis.groupby(['hex_id','year']).mean()
grouped_by.reset_index(inplace=True)

# Transform data
# GDF with unique rows for each geometry (hex)
mean_ndmi_bydate = hex_gdf_f.copy()

# Loop that transforms data to columns
for year in grouped_by.year.unique():
    
    # Isolate new column data (In this case, year)
    idx = grouped_by.year == year
    df_tmp = grouped_by.loc[idx,['hex_id','ndmi']].copy()
    
    # Assign as column name (In this case, year)
    df_tmp = df_tmp.rename(columns={'ndmi':f"ene-may_{year}"})
    
    # Merge to gdf with unique geometry IDs (In this case, hex_id)
    mean_ndmi_bydate = mean_ndmi_bydate.merge(df_tmp, on='hex_id',how='inner')

mean_ndmi_bydate

Unnamed: 0,hex_id,NOMGEO,geometry,ene-may_2018,ene-may_2019,ene-may_2020,ene-may_2021,ene-may_2022,ene-may_2023
0,8a4995b88d67fff,Azcapotzalco,"POLYGON ((-99.17778 19.49754, -99.17713 19.497...",0.105919,0.147756,0.087584,0.035289,0.086562,0.028519
1,8a4995b81af7fff,Azcapotzalco,"POLYGON ((-99.17055 19.47147, -99.16990 19.471...",0.037642,0.031787,0.044080,0.040128,0.033321,0.029482
2,8a4995a36c9ffff,Azcapotzalco,"POLYGON ((-99.21599 19.48356, -99.21535 19.483...",0.005989,-0.005167,-0.011358,-0.020786,0.002482,-0.007598
3,8a4995b8d1a7fff,Azcapotzalco,"POLYGON ((-99.19989 19.49476, -99.19924 19.495...",0.104418,0.105524,0.125967,0.104106,0.086681,0.073300
4,8a4995b818a7fff,Azcapotzalco,"POLYGON ((-99.16380 19.46200, -99.16316 19.462...",0.017313,0.015597,0.027663,0.025225,0.015952,0.011709
...,...,...,...,...,...,...,...,...,...
20696,8a4995b961affff,Venustiano Carranza,"POLYGON ((-99.05523 19.44467, -99.05458 19.445...",-0.145018,-0.164940,-0.195191,-0.156170,-0.113837,-0.147798
20697,8a4995b8640ffff,Venustiano Carranza,"POLYGON ((-99.11389 19.44359, -99.11324 19.444...",0.028383,0.024993,0.037698,0.039310,0.035020,0.019361
20698,8a4995bb3d57fff,Venustiano Carranza,"POLYGON ((-99.09936 19.41918, -99.09872 19.419...",0.054442,0.058805,0.079736,0.076332,0.059195,0.041614
20699,8a4995bb1597fff,Venustiano Carranza,"POLYGON ((-99.11205 19.41712, -99.11140 19.417...",0.021455,0.009891,0.021921,0.022106,0.021361,0.007051


### Método 1 - 1425s

In [12]:
metodo_1 = False

In [13]:
if metodo_1:
    mean_ndmi_bydate = hex_gdf_f.copy()
    
    i = 0
    for year in data_withveg_alcaldias.year.unique():
        for month in data_withveg_alcaldias.month.unique():
            for alcaldia in data_withveg_alcaldias.NOMGEO.unique():
    
                print(f"Calculating mean ndmi in {alcaldia} date {month}/{year}.")
                    
                data_df = data_withveg_alcaldias.loc[(data_withveg_alcaldias.year == year) &
                                                     (data_withveg_alcaldias.month == month) &
                                                     (data_withveg_alcaldias.NOMGEO == alcaldia)].copy()
    
                for hex_id in data_df['hex_id'].unique():
                    # Get each mean hex_id data for current year, month and alcaldía
                    hex_data = data_df.loc[data_df['hex_id'] == hex_id].copy()
                    mean_ndmi = hex_data['ndmi'].mean()
                    # Register data
                    idx = mean_ndmi_bydate['hex_id'] == hex_id
                    mean_ndmi_bydate.loc[idx,f"{year}_{month}"] = mean_ndmi
    
    # Show
    print(mean_ndmi_bydate.shape)
    mean_ndmi_bydate.head(2)

### Método 2 - Transform data to columns and calculate mean on row axis (Canceled)

In [14]:
metodo_2 = False

In [15]:
if metodo_2:
    data_analysis = data_withveg_alcaldias.copy()
    # Create date column
    data_analysis['year'] = data_analysis['year'].astype('str')
    data_analysis['month'] = data_analysis['month'].astype('str') 
    data_analysis['date'] =  data_analysis['year'] + "_"+ data_analysis['month']
    # Drop columns no longer in use
    data_analysis.drop(columns=['ndvi_median','ndvi_median_rango','month','year'],inplace=True)
    
    # Show
    print(data_analysis.shape)
    data_analysis.head(2)

In [16]:
if metodo_2:
    # TRANSFORM DATA FROM MULTIPLE ROWS TO COLUMNS
    ######################################################################### CÓDIGO BASE PARA REPOSICIONAR DATOS (Script 15-15min-cities)
    # delete duplicates and keep only one point for each node
    #nodes_geom = nodes.drop_duplicates(subset='osmid', keep="last")[['osmid','geometry','metropolis']].copy()
    #nodes_analysis = nodes_geom.copy()
    
    # relate time data to each point
    #for amenidad in list(nodes.amenity.unique()):
    
    #    nodes_tmp = nodes.loc[nodes.amenity == amenidad,['osmid','time']]
    #    nodes_tmp = nodes_tmp.rename(columns={'time':amenidad})
    
    #    if nodes_tmp[amenidad].mean() == 0:
    #        nodes_tmp[amenidad] = np.nan
    
    #    nodes_analysis = nodes_analysis.merge(nodes_tmp, on='osmid')
    
    ######################################################################### PARA ESTE CÓDIGO
    #nodes_analysis (gdf con 1 row para cada geometría) --> proviene de data_analysis 
    #amenidad --> date (lo que serán las columnas en el nuevo gdf)
    #osmid --> hex_id (columna de ID de las geometrías)
    #time --> ndmi (columna con datos a reposicionar)
    
    ######################################################################### APLICACIÓN
    # GDF with unique rows for each geometry
    mean_ndmi_bydate = data_analysis.drop_duplicates(subset='hex_id', keep='last')[['hex_id','geometry','NOMGEO']].copy()
    
    # Loop that transforms data
    for date in data_analysis.date.unique():
        
        # Isolate new column data (In this case, date)
        idx = data_analysis.date == date
        df_tmp = data_analysis.loc[idx,['hex_id','ndmi']].copy()
        
        # Assign as column name (In this case, date)
        df_tmp = df_tmp.rename(columns={'ndmi':date})
        
        # Merge to gdf with unique geometry IDs (In this case, hex_id)
        mean_ndmi_bydate = mean_ndmi_bydate.merge(df_tmp, on='hex_id',how='inner')
    
    mean_ndmi_bydate

## Final processing (Canceled, categorizing NDMI from NDVI values is too complex)

NDMI Interpretation
* -1 – -0.8 Bare soil,
* -0.8 – -0.6 Almost absent canopy cover,
* -0.6 – -0.4 Very low canopy cover,
* -0.4 – -0.2 Low canopy cover, dry or very low canopy cover, wet,
* -0.2 – 0 Mid-low canopy cover, high water stress or low canopy cover, low water stress,
* 0 – 0.2 Average canopy cover, high water stress or mid-low canopy cover, low water stress,
* 0.2 – 0.4 Mid-high canopy cover, high water stress or average canopy cover, low water stress,
* 0.4 – 0.6 High canopy cover, no water stress,
* 0.6 – 0.8 Very high canopy cover, no water stress,
* 0.8 – 1 Total canopy cover, no water stress/waterlogging

Current output_gdf canopy cover will be interpreted the following way:
* 'Mínima densidad vegetal' --> Almost absent canopy cover, very low canopy cover, low canopy cover
* 'Moderada densidad vegetal' --> Mid-low canopy cover, Average canopy cover, Mid-high canopy cover
* ' Alta densidad vegetal' --> High canopy cover, very high canopy cover, Total canopy cover

### [Canceled] Create NDMI Categories - Merge NDVI categories data

In [41]:
#output_gdf = mean_ndmi_bydate.merge(data_withveg_alcaldias[['hex_id','ndvi_median_rango']],on='hex_id',how='inner')

# Show
#print(output_gdf.shape)
#output_gdf.head(2)

In [42]:
#analysis_cols = list(mean_ndmi_bydate.columns)
#analysis_cols.remove('hex_id')
#analysis_cols.remove('NOMGEO')
#analysis_cols.remove('geometry')
#analysis_cols

In [43]:
# Test apply function
#test_gdf = output_gdf.copy()

### Create NDMI Categories b) apply function according to each value.
#def splitname(row):
#    row['First']=row['ndvi_median_rango'].split(" ")[0]
#    row['Second']=row['ndvi_median_rango'].split(" ")[1]
#    row['Last']=row['ndvi_median_rango'].split(" ")[2]
#    return row
    
#Aplicar la función
#test_gdf = test_gdf.apply(splitname, axis='columns')
#test_gdf

In [44]:
# Apply function that categorizes NDMI
#def categorize_ndmi(row):
#    if row['ndvi_median_rango'] == 'Mínima densidad vegetal':
#        if row[ndmi_col] < -0.4:
#            row[f"{ndmi_col}_cat"] = 'Almost absent or very low canopy cover"       

## Save

In [40]:
save_dir = f'../../../data/external/temporal_fromjupyter/'
mean_ndmi_bydate.to_file(save_dir+"cutzamala/cdmx_enemay_ndmi.gpkg")