# Environmental data to area of prediction

This notebook integrates environmental data from raster analysis to the area of prediction

## Import libraries

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import aup

## Data download/read and preprocessing

### Area of prediction with variables

In [2]:
gdf = gpd.read_file('../../data/processed/prediccion_uso_suelo/tess_kde.geojson')
print(gdf.shape)
gdf.head(2)

(26735, 28)


Unnamed: 0,fid,UNUSO,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_pct,pobtot,cultural_recreativo,...,uso_tot,pct_habitacional,pct_cultural_recreativo,pct_servicios,pct_comercio,pct_salud,pct_educacion,pct_gobierno,pct_industria,geometry
0,263,Baldio,1492.077966,75GRMHVV+R4XJ,141200001116A008,82.3788,190471.939851,0.007834,277,0.0,...,2.151825,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((665861.85 2289152.881, 665861.743 22..."
1,264,Habitacional,192.276653,75GRMHVR+PR5P,141200001116A008,45.6562,190471.939851,0.001009,277,0.0,...,0.277295,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((665814.876 2289122.473, 665814.781 2..."


### Environmental data

#### NDVI

In [3]:
schema = 'raster_analysis'
table = 'ndvi_analysis_hex'
city = 'Guadalajara'
res = 11

query = f'SELECT hex_id,ndvi_mean FROM {schema}.{table} WHERE \"city\" = \'{city}\' and \"res\"={res}'

ndvi_gdf = aup.df_from_query(query)
print(ndvi_gdf.shape)
ndvi_gdf.head(2)

(480618, 2)


Unnamed: 0,hex_id,ndvi_mean
0,8b49ab4b4858fff,0.131252
1,8b49ab593a93fff,0.199742


#### NDMI

In [4]:
schema = 'raster_analysis'
table = 'ndmi_analysis_hex'
city = 'Guadalajara'
res = 11

query = f'SELECT hex_id,ndmi_diff FROM {schema}.{table} WHERE \"city\" = \'{city}\' and \"res\"={res}'

ndmi_gdf = aup.df_from_query(query)
print(ndmi_gdf.shape)
ndmi_gdf.head(2)

(480618, 2)


Unnamed: 0,hex_id,ndmi_diff
0,8b498c9442d5fff,0.037043
1,8b498c960081fff,0.036202


#### Temperature

In [5]:
schema = 'raster_analysis'
table = 'temperature_analysis_hex'
city = 'Guadalajara'
res = 11

query = f'SELECT hex_id,temperature_mean,geometry FROM {schema}.{table} WHERE \"city\" = \'{city}\' and \"res\"={res}'

temp_gdf = aup.gdf_from_query(query, geometry_col='geometry')

# calculate the variation from the mean
temp_gdf = temp_gdf[~temp_gdf.temperature_mean.isin([float('inf')])].copy()
temp_gdf['temperature_mean_diff'] = temp_gdf.temperature_mean.mean() - temp_gdf.temperature_mean
temp_gdf = temp_gdf.drop(columns=['temperature_mean'])
print(temp_gdf.shape)
temp_gdf.head(2)

(480608, 3)


Unnamed: 0,hex_id,geometry,temperature_mean_diff
0,8b498c96c82efff,"POLYGON ((-103.38492 20.64897, -103.38474 20.6...",-1.168892
1,8b498c960081fff,"POLYGON ((-103.33538 20.63189, -103.33521 20.6...",-1.392322


#### Merge environmental data

In [6]:
env_gdf = temp_gdf.copy()
env_gdf = env_gdf.merge(ndvi_gdf, on='hex_id')
env_gdf = env_gdf.merge(ndmi_gdf, on='hex_id')
print(env_gdf.shape)
env_gdf.head(2)

(480608, 5)


Unnamed: 0,hex_id,geometry,temperature_mean_diff,ndvi_mean,ndmi_diff
0,8b498c96c82efff,"POLYGON ((-103.38492 20.64897, -103.38474 20.6...",-1.168892,0.019083,0.032225
1,8b498c960081fff,"POLYGON ((-103.33538 20.63189, -103.33521 20.6...",-1.392322,0.10852,0.036202


Delete unused data

In [7]:
del ndvi_gdf
del ndmi_gdf
del temp_gdf

## Environmental data to area of prediction

In [8]:
env_gdf = env_gdf.to_crs("EPSG:32613")

In [9]:
gdf_int = gdf.overlay(env_gdf, how='intersection')
gdf_int = gdf_int[['full_plus_code','temperature_mean_diff',
        'ndvi_mean','ndmi_diff']].copy()
print(gdf_int.shape)
gdf_int.head(2)

(62671, 4)


Unnamed: 0,full_plus_code,temperature_mean_diff,ndvi_mean,ndmi_diff
0,75GRMHVV+R4XJ,4.570843,0.540453,0.098399
1,75GRMHVV+R4XJ,4.244152,0.239239,0.06732


In [10]:
gdf_int = gdf_int.groupby('full_plus_code').mean().reset_index()
print(gdf_int.shape)
gdf_int.head(2)

(26616, 4)


Unnamed: 0,full_plus_code,temperature_mean_diff,ndvi_mean,ndmi_diff
0,75GRMG9X+HGMQ,-1.593035,0.298389,0.257904
1,75GRMG9X+JFGQ,-2.460052,0.180289,0.166307


Merge data

In [11]:
gdf = gdf.merge(gdf_int, on='full_plus_code')
print(gdf.shape)
gdf.head(2)

(26735, 31)


Unnamed: 0,fid,UNUSO,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_pct,pobtot,cultural_recreativo,...,pct_servicios,pct_comercio,pct_salud,pct_educacion,pct_gobierno,pct_industria,geometry,temperature_mean_diff,ndvi_mean,ndmi_diff
0,263,Baldio,1492.077966,75GRMHVV+R4XJ,141200001116A008,82.3788,190471.939851,0.007834,277,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((665861.85 2289152.881, 665861.743 22...",4.448344,0.365584,0.07535
1,264,Habitacional,192.276653,75GRMHVR+PR5P,141200001116A008,45.6562,190471.939851,0.001009,277,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((665814.876 2289122.473, 665814.781 2...",3.326428,0.292687,0.066635


Save data

In [12]:
gdf.to_file('../../data/processed/prediccion_uso_suelo/tess_kde_env.geojson')