# Environmental data to area of prediction

This notebook integrates environmental data from raster analysis to the area of prediction

## Import libraries

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import aup

## Data download/read and preprocessing

### Area of prediction with variables

In [2]:
gdf = gpd.read_file('../../data/processed/prediccion_uso_suelo/complete_model/area_of_prediction_kde.gpkg')
print(gdf.shape)
gdf.head(2)

(682605, 34)


Unnamed: 0,uso_suelo,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_m2,pred_area_pct,bld_pred_area_pct,pobtot,...,pct_industria,pct_servicios,pct_alojamiento,pct_comercio,pct_cultural_recreativo,pct_educacion,pct_salud,pct_gobierno,pct_otros,geometry
0,Sin dato,67.279056,75GRMGQW+89PG,1412000011206018,12.438638,278416.454431,67.279056,0.000242,0.184881,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((2349152.724 965335.704, 234914..."
1,Sin dato,45.355545,75GRMGQW+8CFP,1412000011206018,4.482118,278416.454431,45.355545,0.000163,0.098822,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((2349168.91 965335.176, 2349157..."


### Environmental data

#### NDVI

In [3]:
schema = 'raster_analysis'
table = 'ndvi_analysis_hex'
city = 'Guadalajara'
res = 11

query = f'SELECT hex_id,ndvi_mean FROM {schema}.{table} WHERE \"city\" = \'{city}\' and \"res\"={res}'

ndvi_gdf = aup.df_from_query(query)
print(ndvi_gdf.shape)
ndvi_gdf.head(2)

(480618, 2)


Unnamed: 0,hex_id,ndvi_mean
0,8b49ab49876efff,0.12375
1,8b49ab583da8fff,0.096197


#### NDMI

In [4]:
schema = 'raster_analysis'
table = 'ndmi_analysis_hex'
city = 'Guadalajara'
res = 11

query = f'SELECT hex_id,ndmi_diff FROM {schema}.{table} WHERE \"city\" = \'{city}\' and \"res\"={res}'

ndmi_gdf = aup.df_from_query(query)
print(ndmi_gdf.shape)
ndmi_gdf.head(2)

(480618, 2)


Unnamed: 0,hex_id,ndmi_diff
0,8b49ab58ad99fff,0.187656
1,8b49ab586b85fff,0.096924


#### Temperature

In [5]:
schema = 'raster_analysis'
table = 'temperature_analysis_hex'
city = 'Guadalajara'
res = 11

query = f'SELECT hex_id,temperature_mean,geometry FROM {schema}.{table} WHERE \"city\" = \'{city}\' and \"res\"={res}'

temp_gdf = aup.gdf_from_query(query, geometry_col='geometry')

# calculate the variation from the mean
temp_gdf = temp_gdf[~temp_gdf.temperature_mean.isin([float('inf')])].copy()
temp_gdf['temperature_mean_diff'] = temp_gdf.temperature_mean.mean() - temp_gdf.temperature_mean
temp_gdf = temp_gdf.drop(columns=['temperature_mean'])
print(temp_gdf.shape)
temp_gdf.head(2)

(480608, 3)


Unnamed: 0,hex_id,geometry,temperature_mean_diff
0,8b498c944988fff,"POLYGON ((-103.31449 20.65493, -103.31432 20.6...",-1.19599
1,8b498c960081fff,"POLYGON ((-103.33538 20.63189, -103.33521 20.6...",-1.392322


#### Merge environmental data

In [6]:
env_gdf = temp_gdf.copy()
env_gdf = env_gdf.merge(ndvi_gdf, on='hex_id')
env_gdf = env_gdf.merge(ndmi_gdf, on='hex_id')
print(env_gdf.shape)
env_gdf.head(2)

(480608, 5)


Unnamed: 0,hex_id,geometry,temperature_mean_diff,ndvi_mean,ndmi_diff
0,8b498c944988fff,"POLYGON ((-103.31449 20.65493, -103.31432 20.6...",-1.19599,0.14399,0.036642
1,8b498c960081fff,"POLYGON ((-103.33538 20.63189, -103.33521 20.6...",-1.392322,0.10852,0.036202


Delete unused data

In [7]:
del ndvi_gdf
del ndmi_gdf
del temp_gdf

## Environmental data to area of prediction

In [8]:
env_gdf = env_gdf.to_crs("EPSG:6372")

In [9]:
gdf_int = gdf.overlay(env_gdf, how='intersection')
gdf_int = gdf_int[['full_plus_code','temperature_mean_diff',
        'ndvi_mean','ndmi_diff']].copy()
print(gdf_int.shape)
gdf_int.head(2)

  return geopandas.overlay(


(1175745, 4)


Unnamed: 0,full_plus_code,temperature_mean_diff,ndvi_mean,ndmi_diff
0,75GRMGQW+89PG,1.787026,0.084339,0.045966
1,75GRMGQW+8CFP,1.787026,0.084339,0.045966


In [10]:
gdf_int = gdf_int.groupby('full_plus_code').mean().reset_index()
print(gdf_int.shape)
gdf_int.head(2)

(676931, 4)


Unnamed: 0,full_plus_code,temperature_mean_diff,ndvi_mean,ndmi_diff
0,75GRHGWX+JV75,3.04114,0.291476,0.085328
1,75GRHGWX+JV95,2.774119,0.19724,0.065725


Merge data

In [11]:
gdf = gdf.merge(gdf_int, on='full_plus_code')
print(gdf.shape)
gdf.head(2)

(682585, 37)


Unnamed: 0,uso_suelo,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_m2,pred_area_pct,bld_pred_area_pct,pobtot,...,pct_comercio,pct_cultural_recreativo,pct_educacion,pct_salud,pct_gobierno,pct_otros,geometry,temperature_mean_diff,ndvi_mean,ndmi_diff
0,Sin dato,67.279056,75GRMGQW+89PG,1412000011206018,12.438638,278416.454431,67.279056,0.000242,0.184881,0,...,0.0,0.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((2349152.724 965335.704, 234914...",1.787026,0.084339,0.045966
1,Sin dato,45.355545,75GRMGQW+8CFP,1412000011206018,4.482118,278416.454431,45.355545,0.000163,0.098822,0,...,0.0,0.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((2349168.91 965335.176, 2349157...",1.649216,0.109751,0.045719


Save data

In [12]:
gdf.to_file('../../data/processed/prediccion_uso_suelo/complete_model/area_of_prediction_env.gpkg')