# Summer analysis 2023: ndvi+ndmi+marginalization res 10

This notebook joins ndvi (res10), ndmi (res10) and marginalization (res8 to res10 through centroids) and creates categories for NDVI and marg in order to create the following analysis:

Analysis 1: Average min, mean, max data by marginalization group
Min, mean and max ndvi and ndmi is calculated by marginalization group.

Analisis 2: Zones that may recieve irrigation within urban spaces and its relation with marginalization
Places without vegetation are discarted. (ndvi without locations with water, marshy surfaces, artificial structures, rocks adn clouds)
10% max and 10% min ndmi_diff are located, ploted and exported to csv.

## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup



In [3]:
city = 'Monterrey'

In [56]:
csv_dataset = True
save_plots = False

## Data download

Data download - NDVI

In [5]:
# Downloading NDVI res 10 data
ndvi_schema = 'raster_analysis'
ndvi_folder = 'ndvi_analysis_hex'

res = 10

query = f"SELECT * FROM {ndvi_schema}.{ndvi_folder} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res}"
ndvi_res10 = aup.gdf_from_query(query, geometry_col='geometry')

print(ndvi_res10.shape)
ndvi_res10.head(2)

(75705, 11)


Unnamed: 0,hex_id,geometry,res,ndvi_mean,ndvi_std,ndvi_median,ndvi_max,ndvi_min,ndvi_diff,ndvi_tend,city
0,8a48a20e121ffff,"POLYGON ((-100.25434 25.75376, -100.25366 25.7...",10,0.097768,0.048954,0.093641,0.160109,0.021698,0.138411,-0.001268,Monterrey
1,8a48a20e596ffff,"POLYGON ((-100.26159 25.71180, -100.26091 25.7...",10,0.054997,0.031887,0.054372,0.088824,-0.003849,0.092673,-0.000919,Monterrey


Data download - NDMI

In [6]:
# Downloading NDMI res 10 data
ndmi_schema = 'raster_analysis'
ndmi_folder = 'ndmi_analysis_hex'

res = 10

query = f"SELECT * FROM {ndmi_schema}.{ndmi_folder} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res}"
ndmi_res10 = aup.gdf_from_query(query, geometry_col='geometry')

print(ndmi_res10.shape)
ndmi_res10.head(2)

(75705, 11)


Unnamed: 0,hex_id,geometry,res,ndmi_mean,ndmi_std,ndmi_median,ndmi_max,ndmi_min,ndmi_diff,ndmi_tend,city
0,8a48a20e5857fff,"POLYGON ((-100.25832 25.71538, -100.25764 25.7...",10,0.027308,0.078137,0.012566,0.176812,-0.013988,0.190801,-0.000372,Monterrey
1,8a48a20e136ffff,"POLYGON ((-100.25983 25.75116, -100.25915 25.7...",10,-0.00114,0.080293,-0.019805,0.160764,-0.043485,0.204249,1e-06,Monterrey


Data download - Municipalities (in order to download marginalization)

In [7]:
mun_schema = 'metropolis'
mun_table = 'metro_gdf'

query = f"SELECT * FROM {mun_schema}.{mun_table} WHERE \"city\" LIKE \'{city}\'"
mun_gdf = aup.gdf_from_query(query, geometry_col='geometry')

print(mun_gdf.shape)
mun_gdf.head(2)

(18, 6)


Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,NOMGEO,geometry,city
0,19001,19,1,Abasolo,"POLYGON ((-100.39267 25.98715, -100.39224 25.9...",Monterrey
1,19006,19,6,Apodaca,"POLYGON ((-100.24794 25.86462, -100.24672 25.8...",Monterrey


Data download - Marginalization

In [8]:
cvegeo_list = list(mun_gdf.loc[mun_gdf.city==city]["CVEGEO"].unique())
cvegeo_list

['19001',
 '19006',
 '19009',
 '19010',
 '19012',
 '19018',
 '19019',
 '19021',
 '19025',
 '19026',
 '19031',
 '19039',
 '19041',
 '19045',
 '19046',
 '19047',
 '19048',
 '19049']

In [9]:
# donwload hexagons with marg data
hex_marg = gpd.GeoDataFrame()
marg_table = 'hex_bins_marg_2020'
marg_schema = 'censo'


# Iterates over municipality codes for each metropolitan area or capital
for cvegeo in cvegeo_list:
    # Downloads municipality polygon according to code
    query = f"SELECT * FROM {marg_schema}.{marg_table} WHERE \"CVEGEO\" LIKE \'{cvegeo}%%\'"
    hex_tmp = aup.gdf_from_query(query, geometry_col='geometry')
    hex_marg = pd.concat([hex_marg, hex_tmp],
    ignore_index = True, axis = 0)
print(hex_marg.shape)
hex_marg.head(2)

(1463, 17)


Unnamed: 0,geometry,hex_id_8,CVEGEO,pobtot,p6a14nae,sbasc,psdss,ovsde,ovsee,ovsae,ovpt,ovsref,ovsint,ovscel,ovhac,im_2020,imn_2020
0,"POLYGON ((-100.38026 25.93877, -100.38069 25.9...",8848a23881fffff,19001,250.11478,1.507139,6.775771,0.155156,0.077578,0.077578,0.077578,0.0,0.659412,15.438009,1.51277,9.580874,123.461212,0.964433
1,"POLYGON ((-100.38588 25.94625, -100.38631 25.9...",8848a23883fffff,19001,487.4215,3.485196,33.948074,3.441633,0.081661,0.081661,0.081661,0.0,1.01645,87.38989,8.739015,55.745712,123.066629,0.96135


## Data treatment

### Data treatment - Bring marginalization data (res8) to ndvi (res10)

Find ndvi centroids in order to intersect res10 info with a single res8 hex

In [10]:
#Find centroids
ndvi_res10_calc = ndvi_res10.to_crs("EPSG:6372")
centroids = ndvi_res10_calc.centroid
centroids = centroids.to_crs("EPSG:4326")

#Turn into gdf
centroids_gdf = gpd.GeoDataFrame(centroids)
centroids_gdf = centroids_gdf.rename(columns={0:'geometry'})

#Review result
print(centroids_gdf.shape)
centroids_gdf.head(2)

(75705, 1)


Unnamed: 0,geometry
0,POINT (-100.25440 25.75451)
1,POINT (-100.26165 25.71254)


Intersect centroids with orginal ndvi data to keep hex_id data

In [11]:
centroids_hexid = gpd.overlay(centroids_gdf,ndvi_res10,keep_geom_type=True)

columns_tokeep = ['hex_id','geometry']
centroids_hexid = centroids_hexid[columns_tokeep]

#Review result
print(centroids_hexid.shape)
centroids_hexid.head(2)

(75705, 2)


Unnamed: 0,hex_id,geometry
0,8a48a20e121ffff,POINT (-100.25440 25.75451)
1,8a48a20e596ffff,POINT (-100.26165 25.71254)


Intersect centroids with marginalization data

In [13]:
centroids_marg = gpd.overlay(centroids_hexid,hex_marg,keep_geom_type=True)

columns_tokeep = ['hex_id','pobtot','im_2020','imn_2020']
centroids_marg = centroids_marg[columns_tokeep]

#Review result
print(centroids_marg.shape)
centroids_marg.head(2)

(71687, 4)


Unnamed: 0,hex_id,pobtot,im_2020,imn_2020
0,8a48a20e121ffff,13333.322,122.700612,0.958491
1,8a48a20e122ffff,13333.322,122.700612,0.958491


Merge centroids_marg back with res10 geometry

In [14]:
ndvi_marg = pd.merge(ndvi_res10,centroids_marg, on='hex_id')

#Review result
print(ndvi_marg.shape)
ndvi_marg.head(2)

(71687, 14)


Unnamed: 0,hex_id,geometry,res,ndvi_mean,ndvi_std,ndvi_median,ndvi_max,ndvi_min,ndvi_diff,ndvi_tend,city,pobtot,im_2020,imn_2020
0,8a48a20e121ffff,"POLYGON ((-100.25434 25.75376, -100.25366 25.7...",10,0.097768,0.048954,0.093641,0.160109,0.021698,0.138411,-0.001268,Monterrey,13333.322,122.700612,0.958491
1,8a48a20e596ffff,"POLYGON ((-100.26159 25.71180, -100.26091 25.7...",10,0.054997,0.031887,0.054372,0.088824,-0.003849,0.092673,-0.000919,Monterrey,10180.852,123.090448,0.961536


### Data treatment - Merge NDVI_MARG with NDMI

In [15]:
ndmi_res10_clean = ndmi_res10.drop(columns=['geometry','res','city'])
ndvi_ndmi_marg = ndvi_marg.merge(ndmi_res10_clean, left_on='hex_id', right_on='hex_id')

#Reviewing process
print(ndvi_ndmi_marg.shape)
ndvi_ndmi_marg.head(2)

(71687, 21)


Unnamed: 0,hex_id,geometry,res,ndvi_mean,ndvi_std,ndvi_median,ndvi_max,ndvi_min,ndvi_diff,ndvi_tend,...,pobtot,im_2020,imn_2020,ndmi_mean,ndmi_std,ndmi_median,ndmi_max,ndmi_min,ndmi_diff,ndmi_tend
0,8a48a20e121ffff,"POLYGON ((-100.25434 25.75376, -100.25366 25.7...",10,0.097768,0.048954,0.093641,0.160109,0.021698,0.138411,-0.001268,...,13333.322,122.700612,0.958491,0.023483,0.077874,0.009612,0.185504,-0.027486,0.21299,-0.000315
1,8a48a20e596ffff,"POLYGON ((-100.26159 25.71180, -100.26091 25.7...",10,0.054997,0.031887,0.054372,0.088824,-0.003849,0.092673,-0.000919,...,10180.852,123.090448,0.961536,0.006685,0.082761,-0.009015,0.16692,-0.040795,0.207715,-0.000125


### Data treatment - Create data categories

Data treatment - Creating vegetation categories

In [16]:
# Creating vegetation categories
ndvi_ndmi_marg['ndvi_median_rango'] = np.nan
ndvi_ndmi_marg.loc[ndvi_ndmi_marg.ndvi_median>=0.6 , 'ndvi_median_rango'] = 'Alta densidad vegetal'
ndvi_ndmi_marg.loc[(ndvi_ndmi_marg.ndvi_median>=0.4 )&
             (ndvi_ndmi_marg.ndvi_median<0.6), 'ndvi_median_rango'] = 'Moderada densidad vegetal'
ndvi_ndmi_marg.loc[(ndvi_ndmi_marg.ndvi_median>=0.2)&
             (ndvi_ndmi_marg.ndvi_median<0.4), 'ndvi_median_rango'] = 'Mínima densidad vegetal'
ndvi_ndmi_marg.loc[(ndvi_ndmi_marg.ndvi_median>=0.1)&
             (ndvi_ndmi_marg.ndvi_median<0.2), 'ndvi_median_rango'] = 'Suelo'
ndvi_ndmi_marg.loc[(ndvi_ndmi_marg.ndvi_median<0.1), 'ndvi_median_rango'] = 'Suelo artificial/Agua/Piedra'

# Ordering data
categories = ['Suelo artificial/Agua/Piedra', 'Suelo', 'Mínima densidad vegetal', 'Moderada densidad vegetal', 'Alta densidad vegetal']
ndvi_ndmi_marg['ndvi_median_rango'] = pd.Categorical(ndvi_ndmi_marg['ndvi_median_rango'], categories=categories, ordered=True)

# Reviewing process
print(ndvi_ndmi_marg.shape)
ndvi_ndmi_marg.head(2)

(71687, 22)


Unnamed: 0,hex_id,geometry,res,ndvi_mean,ndvi_std,ndvi_median,ndvi_max,ndvi_min,ndvi_diff,ndvi_tend,...,im_2020,imn_2020,ndmi_mean,ndmi_std,ndmi_median,ndmi_max,ndmi_min,ndmi_diff,ndmi_tend,ndvi_median_rango
0,8a48a20e121ffff,"POLYGON ((-100.25434 25.75376, -100.25366 25.7...",10,0.097768,0.048954,0.093641,0.160109,0.021698,0.138411,-0.001268,...,122.700612,0.958491,0.023483,0.077874,0.009612,0.185504,-0.027486,0.21299,-0.000315,Suelo artificial/Agua/Piedra
1,8a48a20e596ffff,"POLYGON ((-100.26159 25.71180, -100.26091 25.7...",10,0.054997,0.031887,0.054372,0.088824,-0.003849,0.092673,-0.000919,...,123.090448,0.961536,0.006685,0.082761,-0.009015,0.16692,-0.040795,0.207715,-0.000125,Suelo artificial/Agua/Piedra


Data treatment - Creating marginalization categories

In [18]:
ndvi_ndmi_marg['Grado_Marg'] = np.nan
ndvi_ndmi_marg.loc[ndvi_ndmi_marg.imn_2020>=0.966338 , 'Grado_Marg'] = 'Muy bajo'
ndvi_ndmi_marg.loc[(ndvi_ndmi_marg.imn_2020>=0.946436 )&
             (ndvi_ndmi_marg.imn_2020<0.966338), 'Grado_Marg'] = 'Bajo'
ndvi_ndmi_marg.loc[(ndvi_ndmi_marg.imn_2020>=0.926536)&
             (ndvi_ndmi_marg.imn_2020<0.946436), 'Grado_Marg'] = 'Medio'
ndvi_ndmi_marg.loc[(ndvi_ndmi_marg.imn_2020>=0.8999)&
             (ndvi_ndmi_marg.imn_2020<0.926536), 'Grado_Marg'] = 'Alto'
ndvi_ndmi_marg.loc[(ndvi_ndmi_marg.imn_2020<0.8999), 'Grado_Marg'] = 'Muy Alto'

categories = ['Muy Alto', 'Alto', 'Medio', 'Bajo', 'Muy bajo']
ndvi_ndmi_marg['Grado_Marg'] = pd.Categorical(ndvi_ndmi_marg['Grado_Marg'], categories=categories, ordered=True)

# Reviewing process
print(ndvi_ndmi_marg.shape)
ndvi_ndmi_marg.head(2)

(71687, 23)


Unnamed: 0,hex_id,geometry,res,ndvi_mean,ndvi_std,ndvi_median,ndvi_max,ndvi_min,ndvi_diff,ndvi_tend,...,imn_2020,ndmi_mean,ndmi_std,ndmi_median,ndmi_max,ndmi_min,ndmi_diff,ndmi_tend,ndvi_median_rango,Grado_Marg
0,8a48a20e121ffff,"POLYGON ((-100.25434 25.75376, -100.25366 25.7...",10,0.097768,0.048954,0.093641,0.160109,0.021698,0.138411,-0.001268,...,0.958491,0.023483,0.077874,0.009612,0.185504,-0.027486,0.21299,-0.000315,Suelo artificial/Agua/Piedra,Bajo
1,8a48a20e596ffff,"POLYGON ((-100.26159 25.71180, -100.26091 25.7...",10,0.054997,0.031887,0.054372,0.088824,-0.003849,0.092673,-0.000919,...,0.961536,0.006685,0.082761,-0.009015,0.16692,-0.040795,0.207715,-0.000125,Suelo artificial/Agua/Piedra,Bajo


## Data analysis 1 - Average min, mean, max data by marginalization group

In [20]:
marginalization_behaviour = ndvi_ndmi_marg.groupby('Grado_Marg').agg({'ndvi_min':np.mean,'ndvi_mean':np.mean,'ndvi_max':np.mean,
                                                                      'ndmi_min':np.mean,'ndmi_mean':np.mean,'ndmi_max':np.mean})
marginalization_behaviour

Unnamed: 0_level_0,ndvi_min,ndvi_mean,ndvi_max,ndmi_min,ndmi_mean,ndmi_max
Grado_Marg,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Muy Alto,0.039999,0.209018,0.376208,-0.10866,-0.009527,0.165715
Alto,0.037491,0.279495,0.470924,-0.104079,0.021025,0.211836
Medio,0.045605,0.228938,0.377378,-0.08453,0.014559,0.193175
Bajo,0.038042,0.207576,0.342798,-0.084312,0.010229,0.190418
Muy bajo,0.039758,0.212049,0.341964,-0.061363,0.028665,0.198022


In [57]:
#Export to csv
if csv_dataset:
    csv_raster_dataset = marginalization_behaviour.copy()
    #Show dataframe
    print(csv_raster_dataset.shape)
    csv_raster_dataset.head(2)
    #Download dataframe
    csv_raster_dataset.to_csv("../../../data/external/averagerasterdata_bymarg.csv")

(5, 6)


## Data analysis 2 - Zones that may recieve irrigation within urban spaces and its relation with marginalization

NDVI filtering for places with vegetation

In [2]:
#Keeping High-moderate-minimal vegetation density
data_withveg = ndvi_ndmi_marg.loc[(ndvi_ndmi_marg.ndvi_median_rango == 'Alta densidad vegetal') |
                   (ndvi_ndmi_marg.ndvi_median_rango == 'Moderada densidad vegetal')|
                   (ndvi_ndmi_marg.ndvi_median_rango == 'Mínima densidad vegetal')]

#Reviewing process
print(data_withveg.shape)
data_withveg.head(2)

NameError: name 'ndvi_ndmi_marg' is not defined

Visual correlation analyisis

In [3]:
fig, ax = plt.subplots(1,2,figsize=(12,6))

ndvi_ndmi_marg.plot('ndvi_median_rango', 'YlGn', ax=ax[0], legend=True)
data_withveg.plot('ndmi_diff', 'viridis', ax=ax[1], legend=True)

ax[0].set_title('Índice de vegetación en {} (res {}).'.format(city,res))
ax[1].set_title('ndmi_diff en {} (res 10).'.format(city))

# ---------- Save Plot ----------
if save_plots:
    plt.savefig("../../../output/figures/ndvi_ndmi_marg_bycity/{}/ndmidiff_{}.svg".format(city.lower(),city.lower()))

NameError: name 'plt' is not defined