## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  ox.config(


In [2]:
city = 'CDMX'

## Load NDVI, hex_gdf and create hexs of interest list (Hexs with vegetation in alcaldías of interest)

### Load data - Load NDVI

In [3]:
# Downloading NDVI res 10 data (aprox. 150-200s)
ndvi_schema = 'raster_analysis'
ndvi_table = 'ndvi_analysis_hex'
res = 10

query = f"SELECT * FROM {ndvi_schema}.{ndvi_table} WHERE \"city\" LIKE \'{city}\' AND \"res\" = {res}"
ndvi_res10 = aup.gdf_from_query(query, geometry_col='geometry')

# Shorten data
ndvi_res10 = ndvi_res10[['hex_id','ndvi_median']]

print(ndvi_res10.shape)
ndvi_res10.head(2)

(59557, 2)


Unnamed: 0,hex_id,ndvi_median
0,8a4995b8cb57fff,0.10846
1,8a4995bab38ffff,0.149826


### NDVI Data treatment - Create vegetation categories

In [4]:
# Create vegetation categories
ndvi_res10['ndvi_median_rango'] = np.nan
ndvi_res10.loc[ndvi_res10.ndvi_median>=0.6 , 'ndvi_median_rango'] = 'Alta densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.4 )&
                (ndvi_res10.ndvi_median<0.6), 'ndvi_median_rango'] = 'Moderada densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.2)&
                (ndvi_res10.ndvi_median<0.4), 'ndvi_median_rango'] = 'Mínima densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.1)&
                (ndvi_res10.ndvi_median<0.2), 'ndvi_median_rango'] = 'Suelo'
ndvi_res10.loc[(ndvi_res10.ndvi_median<0.1), 'ndvi_median_rango'] = 'Suelo artificial/Agua/Piedra'

# Ordering data
categories = ['Suelo artificial/Agua/Piedra', 'Suelo', 'Mínima densidad vegetal', 'Moderada densidad vegetal', 'Alta densidad vegetal']
ndvi_res10['ndvi_median_rango'] = pd.Categorical(ndvi_res10['ndvi_median_rango'], categories=categories, ordered=True)

# Reviewing process
print(ndvi_res10.shape)
ndvi_res10.head(2)

(59557, 3)


Unnamed: 0,hex_id,ndvi_median,ndvi_median_rango
0,8a4995b8cb57fff,0.10846,Suelo
1,8a4995bab38ffff,0.149826,Suelo


### NDVI Data treatment - Filter for places with vegetation

In [5]:
#Keeping High-moderate-minimal vegetation density
data_withveg = ndvi_res10.loc[(ndvi_res10.ndvi_median_rango == 'Alta densidad vegetal') |
               (ndvi_res10.ndvi_median_rango == 'Moderada densidad vegetal')|
               (ndvi_res10.ndvi_median_rango == 'Mínima densidad vegetal')].copy()

# Show
print(data_withveg.shape)
data_withveg.head(2)

(35317, 3)


Unnamed: 0,hex_id,ndvi_median,ndvi_median_rango
7,8a4995b8d277fff,0.28332,Mínima densidad vegetal
10,8a4995b81107fff,0.215763,Mínima densidad vegetal


### Load data - Load hex_gdf (Contiene alcaldías)

In [6]:
# Download hex_gdf  (aprox. 150-200s)
hex_schema = 'hexgrid'
hex_table = 'hexgrid_10_city_2020'
query = f"SELECT * FROM {hex_schema}.{hex_table} WHERE \"city\" LIKE \'{city}\'"
hex_gdf = aup.gdf_from_query(query, geometry_col='geometry')

# Show
print(hex_gdf.shape)
hex_gdf.head(2)

(86349, 6)


Unnamed: 0,hex_id_10,geometry,CVEGEO,NOMGEO,city,type
0,8a4995b8cb57fff,"POLYGON ((-99.19358 19.48193, -99.19293 19.482...",9002,Azcapotzalco,CDMX,urban
1,8a4995b88d67fff,"POLYGON ((-99.17778 19.49754, -99.17713 19.497...",9002,Azcapotzalco,CDMX,urban


### Filter for alcaldías of interest

In [7]:
# Filter hex_gdf for municipalities where Cutzamala system provides water
alcaldias_cutzamala = ['Álvaro Obregón','Azcapotzalco', 'Cuajimalpa de Morelos','Iztacalco', 
                       'Iztapalapa','La Magdalena Contreras','Tlalpan', 'Benito Juárez', 
                       'Cuauhtémoc','Miguel Hidalgo', 'Venustiano Carranza']
hex_gdf_f = hex_gdf.loc[hex_gdf.NOMGEO.isin(alcaldias_cutzamala)]

# Shorten data
hex_gdf_f = hex_gdf_f[['hex_id_10','NOMGEO','geometry']]
hex_gdf_f.rename(columns={'hex_id_10':'hex_id'},inplace=True)

# Show
print(hex_gdf_f.shape)
hex_gdf_f.head(2)

(49335, 3)


Unnamed: 0,hex_id,NOMGEO,geometry
0,8a4995b8cb57fff,Azcapotzalco,"POLYGON ((-99.19358 19.48193, -99.19293 19.482..."
1,8a4995b88d67fff,Azcapotzalco,"POLYGON ((-99.17778 19.49754, -99.17713 19.497..."


In [8]:
# Inner join to keep ndvi and ndmi data within alcaldías of interest only
data_withveg_alcaldias = data_withveg.merge(hex_gdf_f,on='hex_id',how='inner')

# Show
print(data_withveg_alcaldias.shape)
data_withveg_alcaldias.head(2)

(18981, 5)


Unnamed: 0,hex_id,ndvi_median,ndvi_median_rango,NOMGEO,geometry
0,8a4995b8d277fff,0.28332,Mínima densidad vegetal,Azcapotzalco,"POLYGON ((-99.20948 19.50623, -99.20883 19.506..."
1,8a4995b81107fff,0.215763,Mínima densidad vegetal,Azcapotzalco,"POLYGON ((-99.16277 19.46651, -99.16212 19.466..."


## Load data - Load NDMI using city, res and tupple of previously filtered hexs

In [None]:
# Hexs of interest (with vegetation and in alcaldias of interest)
hexs_lst = list(data_withveg_alcaldias.hex_id.unique())

# Downloading complete dataset NDMI res data (aprox. 700-1000s)
complete_ndmi_schema = 'raster_analysis'
complete_ndmi_table = 'ndmi_complete_dataset_hex'
res = 10
query = f"SELECT * FROM {complete_ndmi_schema}.{complete_ndmi_table} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res}\' AND \"hex_id\" IN {str(tuple(hexs_lst))}"
complete_ndmi_res10 = aup.df_from_query(query)

# Filter for dates of interest
#months_ofinterest = [1,2,3,4,5]
#years_ofinterest = [2018,2019,2020,2021,2022,2023]
#complete_ndmi_res10_f = complete_ndmi_res10.loc[(complete_ndmi_res10.month.isin(months_ofinterest)) & (complete_ndmi_res10.year.isin(years_ofinterest))]
# Shorten data
complete_ndmi_res10_f = complete_ndmi_res10_f[['hex_id','ndmi','month','year']]

# Show
print(complete_ndmi_res10_f.shape)
complete_ndmi_res10_f.head(2)

## Data treatment

### Data treatment - Merge ndmi for places with vegetation

In [10]:
# Inner merge to keep ndmi data for hexs with vegetation only
ndvi_ndmi_gdf = data_withveg.merge(complete_ndmi_res10_f, on='hex_id',how='inner')

# Show
print(ndvi_ndmi_gdf.shape)
ndvi_ndmi_gdf.head(2)

(1063010, 6)


Unnamed: 0,hex_id,ndvi_median,ndvi_median_rango,ndmi,month,year
0,8a4995b8d277fff,0.28332,Mínima densidad vegetal,0.025837,1,2023
1,8a4995b8d277fff,0.28332,Mínima densidad vegetal,0.006358,2,2023


## Data analysis

### Método 1 - 1425s

In [12]:
metodo_1 = False

In [13]:
if metodo_1:
    mean_ndmi_bydate = hex_gdf_f.copy()
    
    i = 0
    for year in data_withveg_alcaldias.year.unique():
        for month in data_withveg_alcaldias.month.unique():
            for alcaldia in data_withveg_alcaldias.NOMGEO.unique():
    
                print(f"Calculating mean ndmi in {alcaldia} date {month}/{year}.")
                    
                data_df = data_withveg_alcaldias.loc[(data_withveg_alcaldias.year == year) &
                                                     (data_withveg_alcaldias.month == month) &
                                                     (data_withveg_alcaldias.NOMGEO == alcaldia)].copy()
    
                for hex_id in data_df['hex_id'].unique():
                    # Get each mean hex_id data for current year, month and alcaldía
                    hex_data = data_df.loc[data_df['hex_id'] == hex_id].copy()
                    mean_ndmi = hex_data['ndmi'].mean()
                    # Register data
                    idx = mean_ndmi_bydate['hex_id'] == hex_id
                    mean_ndmi_bydate.loc[idx,f"{year}_{month}"] = mean_ndmi
    
    # Show
    print(mean_ndmi_bydate.shape)
    mean_ndmi_bydate.head(2)

### Método 2 - Transform data to columns and calculate mean on row axis (Work in progress)

In [14]:
data_analysis = data_withveg_alcaldias.copy()
# Create date column
data_analysis['year'] = data_analysis['year'].astype('str')
data_analysis['month'] = data_analysis['month'].astype('str') 
data_analysis['date'] =  data_analysis['year'] + "_"+ data_analysis['month']
# Drop columns no longer in use
data_analysis.drop(columns=['ndvi_median','ndvi_median_rango','month','year'],inplace=True)

# Show
print(data_analysis.shape)
data_analysis.head(2)

(571605, 5)


Unnamed: 0,hex_id,ndmi,NOMGEO,geometry,date
0,8a4995b8d277fff,0.025837,Azcapotzalco,"POLYGON ((-99.20948 19.50623, -99.20883 19.506...",2023_1
1,8a4995b8d277fff,0.006358,Azcapotzalco,"POLYGON ((-99.20948 19.50623, -99.20883 19.506...",2023_2


In [18]:
data_analysis.date.unique()

array(['2023_1', '2023_2', '2023_3', '2023_4', '2023_5', '2018_1',
       '2018_2', '2018_3', '2018_4', '2018_5', '2019_1', '2019_2',
       '2019_3', '2019_4', '2019_5', '2020_1', '2020_2', '2020_3',
       '2020_4', '2020_5', '2021_1', '2021_2', '2021_3', '2021_4',
       '2021_5', '2022_1', '2022_2', '2022_3', '2022_4', '2022_5'],
      dtype=object)

In [None]:
# TRANSFORM DATA FROM MULTIPLE ROWS TO COLUMNS
######################################################################### CÓDIGO BASE PARA REPOSICIONAR DATOS (Script 15-15min-cities)
# delete duplicates and keep only one point for each node
#nodes_geom = nodes.drop_duplicates(subset='osmid', keep="last")[['osmid','geometry','metropolis']].copy()
#nodes_analysis = nodes_geom.copy()

# relate time data to each point
#for amenidad in list(nodes.amenity.unique()):

#    nodes_tmp = nodes.loc[nodes.amenity == amenidad,['osmid','time']]
#    nodes_tmp = nodes_tmp.rename(columns={'time':amenidad})

#    if nodes_tmp[amenidad].mean() == 0:
#        nodes_tmp[amenidad] = np.nan

#    nodes_analysis = nodes_analysis.merge(nodes_tmp, on='osmid')

######################################################################### PARA ESTE CÓDIGO
#nodes_analysis (gdf con 1 row para cada geometría) --> proviene de data_analysis 
#amenidad --> date (lo que serán las columnas en el nuevo gdf)
#osmid --> hex_id (columna de ID de las geometrías)
#time --> ndmi (columna con datos a reposicionar)

######################################################################### APLICACIÓN
# GDF with unique rows for each geometry
mean_ndmi_bydate = data_analysis.drop_duplicates(subset='hex_id', keep='last')[['hex_id','geometry','NOMGEO']].copy()

# Loop that transforms data
for date in data_analysis.date.unique():
    
    # Isolate new column data (In this case, date)
    idx = data_analysis.date == date
    df_tmp = data_analysis.loc[idx,['hex_id','ndmi']].copy()
    
    # Assign as column name (In this case, date)
    df_tmp = df_tmp.rename(columns={'ndmi':date})
    
    # Merge to gdf with unique geometry IDs (In this case, hex_id)
    mean_ndmi_bydate = mean_ndmi_bydate.merge(df_tmp, on='hex_id',how='inner')

mean_ndmi_bydate

In [16]:
#mean_ndmi_bydate = gpd.GeoDataFrame(mean_ndmi_bydate, geometry='geometry')
#mean_ndmi_bydate.plot()