# Summer analysis 2023 ndvi+marg

This notebook joins categorized NDVI with categorized marginalization in order to export it to a csv.

### NDVI res8 + marg res8 to csv

1. Download NDVI res8
2. Create vegetation categories for NDVI
3. Download marginalization res8 (from hexs in NDVI)
4. Create marginalization categories
5. Merge databases
6. Save to csv

## Notes:

NDVI --> hex_id, res, ndvi_mean, ndvi_std, ndvi_median, ndvi_max, ndvi_min, ndvi_diff, ndvi_tend, city
* res: 8,9,10,11
* city: Aguascalientes, Cancun, Chihuahua, Chilpancingo, Coatzacoalcos, Colima, Cuernavaca, Culiacan, Durango, Ensenada, Guadalajara, Guanajuato, Juarez, La Paz, Leon, Matamoros, Mazatlan, Merida, Minatitlan, Monclova, Monterrey, Moroleon, Nogales, Nuevo Laredo, Oaxaca, Ocotlan, Pachuca, Parral, Piedad, Piedras Negras, Queretaro, Tijuana, Tuxtla, ZMVM.

NDMI --> hex_id, res, ndmi_mean, ndmi_std, ndmi_median, ndmi_max, ndmi_min, ndmi_diff, ndmi_tend, city
* res: 8,9,10,11
* city: Chihuahua, Guadalajara, Leon, Merida, Monterrey, Puebla (¡Puebla not in NDVI!), Queretaro, Tijuana, Tuxtla, ZMVM.

## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup



## Config notebook

In [2]:
city = 'Guadalajara'

In [3]:
#Export to "../../../data/external/__"
csv_dataset = True

# First analysis: NDVI - NDMI - Marginalization correlation (Res 8)

## Data download

Data download - NDVI

In [4]:
# Downloading NDVI res 8 data
ndvi_schema = 'raster_analysis'
ndvi_folder = 'ndvi_analysis_hex'

res = 8

query = f"SELECT * FROM {ndvi_schema}.{ndvi_folder} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res}"
ndvi_res8 = aup.gdf_from_query(query, geometry_col='geometry')

print(ndvi_res8.shape)
ndvi_res8.head(2)

(1187, 11)


Unnamed: 0,hex_id,geometry,res,ndvi_mean,ndvi_std,ndvi_median,ndvi_max,ndvi_min,ndvi_diff,ndvi_tend,city
0,88498c8649fffff,"POLYGON ((-103.32357 20.76827, -103.32865 20.7...",8,0.24476,0.100867,0.24744,0.368982,0.115646,0.253336,-0.002247,Guadalajara
1,88498c864bfffff,"POLYGON ((-103.32919 20.77561, -103.33427 20.7...",8,0.269992,0.122279,0.255744,0.431332,0.113069,0.318263,-0.001899,Guadalajara


## Data treatment

Data treatment - Creating vegetation categories

In [5]:
# NDVI res 8 processing
# Creating vegetation categories
ndvi_res8['ndvi_median_rango'] = np.nan
ndvi_res8.loc[ndvi_res8.ndvi_median>=0.6 , 'ndvi_median_rango'] = 'Alta densidad vegetal'
ndvi_res8.loc[(ndvi_res8.ndvi_median>=0.4 )&
             (ndvi_res8.ndvi_median<0.6), 'ndvi_median_rango'] = 'Moderada densidad vegetal'
ndvi_res8.loc[(ndvi_res8.ndvi_median>=0.2)&
             (ndvi_res8.ndvi_median<0.4), 'ndvi_median_rango'] = 'Mínima densidad vegetal'
ndvi_res8.loc[(ndvi_res8.ndvi_median>=0.1)&
             (ndvi_res8.ndvi_median<0.2), 'ndvi_median_rango'] = 'Suelo'
ndvi_res8.loc[(ndvi_res8.ndvi_median<0.1), 'ndvi_median_rango'] = 'Suelo artificial/Agua/Piedra'

# Ordering data
categories = ['Suelo artificial/Agua/Piedra', 'Suelo', 'Mínima densidad vegetal', 'Moderada densidad vegetal', 'Alta densidad vegetal']
ndvi_res8['ndvi_median_rango'] = pd.Categorical(ndvi_res8['ndvi_median_rango'], categories=categories, ordered=True)

# Reviewing process
print(ndvi_res8.shape)
ndvi_res8.head(2)

(1187, 12)


Unnamed: 0,hex_id,geometry,res,ndvi_mean,ndvi_std,ndvi_median,ndvi_max,ndvi_min,ndvi_diff,ndvi_tend,city,ndvi_median_rango
0,88498c8649fffff,"POLYGON ((-103.32357 20.76827, -103.32865 20.7...",8,0.24476,0.100867,0.24744,0.368982,0.115646,0.253336,-0.002247,Guadalajara,Mínima densidad vegetal
1,88498c864bfffff,"POLYGON ((-103.32919 20.77561, -103.33427 20.7...",8,0.269992,0.122279,0.255744,0.431332,0.113069,0.318263,-0.001899,Guadalajara,Mínima densidad vegetal


### Download marginalization data from ndvi geometries and merge

Marginalization data download

In [6]:
# Finds hex_id codes from ndvi data
hex_ids = str(tuple(list(ndvi_res8["hex_id"].unique())))

# Location of hexagons with marginalization data
marg_schema = 'censo'
marg_folder = 'hex_bins_marg_2020'

query = f"SELECT * FROM {marg_schema}.{marg_folder} WHERE \"hex_id_8\" IN {hex_ids}"
marg_res8 = aup.gdf_from_query(query, geometry_col='geometry')

print(marg_res8.shape)
marg_res8.head(2)

(1162, 17)


Unnamed: 0,geometry,hex_id_8,CVEGEO,pobtot,p6a14nae,sbasc,psdss,ovsde,ovsee,ovsae,ovpt,ovsref,ovsint,ovscel,ovhac,im_2020,imn_2020
0,"POLYGON ((-103.61319 20.42802, -103.61772 20.4...",8849abc9ebfffff,14002,273.16855,0.474944,2.828432,1.787101,0.1098,0.008903,0.115735,0.029676,0.697379,4.282201,0.593514,2.756871,120.556725,0.941744
1,"POLYGON ((-103.61374 20.43321, -103.61319 20.4...",8849abc9e3fffff,14002,89.333336,1.282051,7.092198,7.960199,0.0,0.0,0.621891,0.0,1.492537,15.049751,0.373134,8.333333,122.339516,0.95567


Marginalization data treatment

In [7]:
#Creating marginalization categories
marg_res8['Grado_Marg'] = np.nan
marg_res8.loc[marg_res8.imn_2020>=0.966338 , 'Grado_Marg'] = 'Muy bajo'
marg_res8.loc[(marg_res8.imn_2020>=0.946436 )&
             (marg_res8.imn_2020<0.966338), 'Grado_Marg'] = 'Bajo'
marg_res8.loc[(marg_res8.imn_2020>=0.926536)&
             (marg_res8.imn_2020<0.946436), 'Grado_Marg'] = 'Medio'
marg_res8.loc[(marg_res8.imn_2020>=0.8999)&
             (marg_res8.imn_2020<0.926536), 'Grado_Marg'] = 'Alto'
marg_res8.loc[(marg_res8.imn_2020<0.8999), 'Grado_Marg'] = 'Muy Alto'

#Ordening data
categories = ['Muy Alto', 'Alto', 'Medio', 'Bajo', 'Muy bajo']
marg_res8['Grado_Marg'] = pd.Categorical(marg_res8['Grado_Marg'], categories=categories, ordered=True)

#Reviewing process
print(marg_res8.shape)
marg_res8.head(2)

(1162, 18)


Unnamed: 0,geometry,hex_id_8,CVEGEO,pobtot,p6a14nae,sbasc,psdss,ovsde,ovsee,ovsae,ovpt,ovsref,ovsint,ovscel,ovhac,im_2020,imn_2020,Grado_Marg
0,"POLYGON ((-103.61319 20.42802, -103.61772 20.4...",8849abc9ebfffff,14002,273.16855,0.474944,2.828432,1.787101,0.1098,0.008903,0.115735,0.029676,0.697379,4.282201,0.593514,2.756871,120.556725,0.941744,Medio
1,"POLYGON ((-103.61374 20.43321, -103.61319 20.4...",8849abc9e3fffff,14002,89.333336,1.282051,7.092198,7.960199,0.0,0.0,0.621891,0.0,1.492537,15.049751,0.373134,8.333333,122.339516,0.95567,Bajo


NDVI + marg merge

In [8]:
columns_tokeep = ['hex_id_8','pobtot','im_2020','imn_2020','Grado_Marg']
marg_res8_clean = marg_res8[columns_tokeep]
ndvi_marg = ndvi_res8.merge(marg_res8_clean, left_on='hex_id', right_on='hex_id_8')
ndvi_marg = ndvi_marg.drop(columns=['hex_id_8'])

#Reviewing process
print(ndvi_marg.shape)
ndvi_marg.head(2)

(1162, 16)


Unnamed: 0,hex_id,geometry,res,ndvi_mean,ndvi_std,ndvi_median,ndvi_max,ndvi_min,ndvi_diff,ndvi_tend,city,ndvi_median_rango,pobtot,im_2020,imn_2020,Grado_Marg
0,88498c8649fffff,"POLYGON ((-103.32357 20.76827, -103.32865 20.7...",8,0.24476,0.100867,0.24744,0.368982,0.115646,0.253336,-0.002247,Guadalajara,Mínima densidad vegetal,4877.139,118.322241,0.924289,Alto
1,88498c864bfffff,"POLYGON ((-103.32919 20.77561, -103.33427 20.7...",8,0.269992,0.122279,0.255744,0.431332,0.113069,0.318263,-0.001899,Guadalajara,Mínima densidad vegetal,877.6089,116.99472,0.913919,Alto


In [9]:
#Export to csv
if csv_dataset:
    csv_raster_dataset = ndvi_marg.copy()
    #Show dataframe
    print(csv_raster_dataset.shape)
    csv_raster_dataset.head(2)
    #Download dataframe
    csv_raster_dataset.to_csv("../../../data/external/ndvi_marg_{}".format(city.lower()))

(1162, 16)


In [10]:
ndvi_ndmi_marg = ndvi_ndmi_marg.sort_values('Grado_Marg')
ndvi_ndmi_marg.loc[ndvi_ndmi_marg.Grado_Marg.isna()]

NameError: name 'ndvi_ndmi_marg' is not defined

In [None]:
fig, ax = plt.subplots(figsize=(10,10))

sns.boxplot(x=np.array(ndvi_ndmi_marg.Grado_Marg) , y=np.array(ndvi_ndmi_marg.ndmi_diff), ax=ax)

plt.title('Relación entre grado de marginación y ndmi_diff en {}.'.format(city))
plt.xlabel('Grado de marginación')
plt.ylabel('ndmi_diff')

# ------------------------------------------- Add number of hex per marginalization group -------------------------------------------

#Calculate median of each Marginalization group to position number of observations
maxs = ndvi_ndmi_marg.groupby(['Grado_Marg'])['ndmi_diff'].max()
num_maxs = maxs.values
#Drop NaNs
num_maxs = num_maxs[~np.isnan(num_maxs)]

# Calculate number of observations per group
obs = ndvi_ndmi_marg['Grado_Marg'].value_counts()
obs = obs.reset_index()
obs = obs.sort_values('index')
print(obs)
#Drop Ceros by turning them to nans
obs = obs['Grado_Marg'].apply(lambda x: np.nan if x==0 else x)
obs = obs[~np.isnan(obs)]

num_obs = obs.values
num_obs = [str(x) for x in num_obs.tolist()]
num_obs = ["n: " + i for i in num_obs]

# Add it to the plot
pos = range(len(num_obs))
for tick,label in zip(pos,ax.get_xticklabels()):
    #ax.text(x,y,text)
    ax.text(pos[tick],
            num_maxs[tick] + 0.005,
            num_obs[tick],
            horizontalalignment='center',
            size='x-small',
            color='black')
# ------------------------------------------- ------------------------------------------- -------------------------------------------

#Para plotear figuras quitar # 
#plt.savefig("../../../output/figures/ndvi_ndmi_marg_bycity/{}/ndmidiff_marg_boxplot_{}.svg".format(city.lower(),city.lower()))

Data download - NDVI

In [None]:
# Downloading NDVI res 10 data
ndvi_schema = 'raster_analysis'
ndvi_folder = 'ndvi_analysis_hex'

res = 10

query = f"SELECT * FROM {ndvi_schema}.{ndvi_folder} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res}"
ndvi_res10 = aup.gdf_from_query(query, geometry_col='geometry')

print(ndvi_res10.shape)
ndvi_res10.head(2)

Data download - NDMI

In [None]:
# Downloading NDMI res 10 data
ndmi_schema = 'raster_analysis'
ndmi_folder = 'ndmi_analysis_hex'

res = 10

query = f"SELECT * FROM {ndmi_schema}.{ndmi_folder} WHERE \"city\" LIKE \'{city}\' AND \"res\"={res}"
ndmi_res10 = aup.gdf_from_query(query, geometry_col='geometry')

print(ndmi_res10.shape)
ndmi_res10.head(2)

Data treatment - Creating vegetation categories

In [None]:
# NDVI res 10 processing
# Creating vegetation categories
ndvi_res10['ndvi_median_rango'] = np.nan
ndvi_res10.loc[ndvi_res10.ndvi_median>=0.6 , 'ndvi_median_rango'] = 'Alta densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.4 )&
             (ndvi_res10.ndvi_median<0.6), 'ndvi_median_rango'] = 'Moderada densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.2)&
             (ndvi_res10.ndvi_median<0.4), 'ndvi_median_rango'] = 'Mínima densidad vegetal'
ndvi_res10.loc[(ndvi_res10.ndvi_median>=0.1)&
             (ndvi_res10.ndvi_median<0.2), 'ndvi_median_rango'] = 'Suelo'
ndvi_res10.loc[(ndvi_res10.ndvi_median<0.1), 'ndvi_median_rango'] = 'Suelo artificial/Agua/Piedra'

# Ordering data
categories = ['Suelo artificial/Agua/Piedra', 'Suelo', 'Mínima densidad vegetal', 'Moderada densidad vegetal', 'Alta densidad vegetal']
ndvi_res10['ndvi_median_rango'] = pd.Categorical(ndvi_res10['ndvi_median_rango'], categories=categories, ordered=True)

# Reviewing process
print(ndvi_res10.shape)
ndvi_res10.head(2)

Data treatment - NDVI filtering for places with vegetation

In [None]:
#Keeping High-moderate-minimal vegetation density
ndvi_withveg_res10 = ndvi_res10.loc[(ndvi_res10.ndvi_median_rango == 'Alta densidad vegetal') |
                   (ndvi_res10.ndvi_median_rango == 'Moderada densidad vegetal')|
                   (ndvi_res10.ndvi_median_rango == 'Mínima densidad vegetal')]

#Reviewing process
print(ndvi_withveg_res10.shape)
ndvi_withveg_res10.head(2)

Data treatment - Merging NDVI and NDMI

In [None]:
ndmi_res10_clean = ndmi_res10.drop(columns=['geometry','res','city'])
ndvi_ndmi_res10 = ndvi_withveg_res10.merge(ndmi_res10_clean, left_on='hex_id', right_on='hex_id')

#Reviewing process
print(ndvi_ndmi_res10.shape)
ndvi_ndmi_res10.head(2)

In [None]:
fig, ax = plt.subplots(1,2,figsize=(12,6))

ndvi_ndmi_res10.plot('ndmi_diff', 'viridis', ax=ax[0], legend=True)
ndvi_res10.plot('ndvi_median_rango', 'YlGn', ax=ax[1], legend=True)

ax[0].set_title('ndmi_diff en {} (res 10).'.format(city))
ax[1].set_title('Índice de vegetación en {} (res {}).'.format(city,res))

# Guardar gráficos
plt.savefig("../../../output/figures/ndvi_ndmi_marg_bycity/{}/ndmidiff_{}.svg".format(city.lower(),city.lower()))