# Environmental data to area of prediction

This notebook integrates environmental data from raster analysis to the area of prediction

## Import libraries

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import aup

## Data download/read and preprocessing

### Area of prediction with variables

In [2]:
gdf = gpd.read_file('../../data/processed/prediccion_uso_suelo/test/area_of_prediction_kde.gpkg')
print(gdf.shape)
gdf.head(2)

(37061, 29)


Unnamed: 0,uso_suelo,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_m2,pred_area_pct,bld_pred_area_pct,pobtot,...,uso_tot,pct_habitacional,pct_cultural_recreativo,pct_servicios,pct_comercio,pct_salud,pct_educacion,pct_gobierno,pct_industria,geometry
0,Servicio,2257.817773,75GRMHR2+458V,1412000011206018,173.7348,278416.454431,2747.599834,0.009869,0.063231,0,...,4.926511,0.0,0.0,0.740466,0.231342,0.0,0.0,0.0,0.028192,"MULTIPOLYGON (((2349620.239 965525.905, 234961..."
1,Sin dato,711.09856,75GRMGRX+GQP3,1412000011206018,81.7922,278416.454431,900.765482,0.003235,0.090803,0,...,58.144456,0.0,0.0,0.586683,0.354285,0.0,0.0,0.0,0.059033,"MULTIPOLYGON (((2349506.214 965639.352, 234950..."


### Environmental data

#### NDVI

In [3]:
schema = 'raster_analysis'
table = 'ndvi_analysis_hex'
city = 'Guadalajara'
res = 11

query = f'SELECT hex_id,ndvi_mean FROM {schema}.{table} WHERE \"city\" = \'{city}\' and \"res\"={res}'

ndvi_gdf = aup.df_from_query(query)
print(ndvi_gdf.shape)
ndvi_gdf.head(2)

(480618, 2)


Unnamed: 0,hex_id,ndvi_mean
0,8b498c960081fff,0.10852
1,8b498c941c48fff,0.092168


#### NDMI

In [4]:
schema = 'raster_analysis'
table = 'ndmi_analysis_hex'
city = 'Guadalajara'
res = 11

query = f'SELECT hex_id,ndmi_diff FROM {schema}.{table} WHERE \"city\" = \'{city}\' and \"res\"={res}'

ndmi_gdf = aup.df_from_query(query)
print(ndmi_gdf.shape)
ndmi_gdf.head(2)

(480618, 2)


Unnamed: 0,hex_id,ndmi_diff
0,8b49ab4b4642fff,0.063668
1,8b498c94a20efff,0.052847


#### Temperature

In [5]:
schema = 'raster_analysis'
table = 'temperature_analysis_hex'
city = 'Guadalajara'
res = 11

query = f'SELECT hex_id,temperature_mean,geometry FROM {schema}.{table} WHERE \"city\" = \'{city}\' and \"res\"={res}'

temp_gdf = aup.gdf_from_query(query, geometry_col='geometry')

# calculate the variation from the mean
temp_gdf = temp_gdf[~temp_gdf.temperature_mean.isin([float('inf')])].copy()
temp_gdf['temperature_mean_diff'] = temp_gdf.temperature_mean.mean() - temp_gdf.temperature_mean
temp_gdf = temp_gdf.drop(columns=['temperature_mean'])
print(temp_gdf.shape)
temp_gdf.head(2)

(480608, 3)


Unnamed: 0,hex_id,geometry,temperature_mean_diff
0,8b498c96a196fff,"POLYGON ((-103.34959 20.68947, -103.34941 20.6...",1.02978
1,8b498c960081fff,"POLYGON ((-103.33538 20.63189, -103.33521 20.6...",-1.392322


#### Merge environmental data

In [6]:
env_gdf = temp_gdf.copy()
env_gdf = env_gdf.merge(ndvi_gdf, on='hex_id')
env_gdf = env_gdf.merge(ndmi_gdf, on='hex_id')
print(env_gdf.shape)
env_gdf.head(2)

(480608, 5)


Unnamed: 0,hex_id,geometry,temperature_mean_diff,ndvi_mean,ndmi_diff
0,8b498c96a196fff,"POLYGON ((-103.34959 20.68947, -103.34941 20.6...",1.02978,0.147358,0.068029
1,8b498c960081fff,"POLYGON ((-103.33538 20.63189, -103.33521 20.6...",-1.392322,0.10852,0.036202


Delete unused data

In [7]:
del ndvi_gdf
del ndmi_gdf
del temp_gdf

## Environmental data to area of prediction

In [8]:
env_gdf = env_gdf.to_crs("EPSG:6372")

In [9]:
gdf_int = gdf.overlay(env_gdf, how='intersection')
gdf_int = gdf_int[['full_plus_code','temperature_mean_diff',
        'ndvi_mean','ndmi_diff']].copy()
print(gdf_int.shape)
gdf_int.head(2)

(88728, 4)


Unnamed: 0,full_plus_code,temperature_mean_diff,ndvi_mean,ndmi_diff
0,75GRMHR2+458V,1.547896,0.174779,0.07401
1,75GRMHR2+458V,1.274798,0.360041,0.069455


In [10]:
gdf_int = gdf_int.groupby('full_plus_code').mean().reset_index()
print(gdf_int.shape)
gdf_int.head(2)

(36761, 4)


Unnamed: 0,full_plus_code,temperature_mean_diff,ndvi_mean,ndmi_diff
0,75GRMG8X+7XM5,0.274744,0.156782,0.095076
1,75GRMG8X+8FJR,-0.209318,0.099273,0.081487


Merge data

In [11]:
gdf = gdf.merge(gdf_int, on='full_plus_code')
print(gdf.shape)
gdf.head(2)

(37061, 32)


Unnamed: 0,uso_suelo,area_m2,full_plus_code,CVEGEO,bld_area_m2,block_area_m2,pred_area_m2,pred_area_pct,bld_pred_area_pct,pobtot,...,pct_servicios,pct_comercio,pct_salud,pct_educacion,pct_gobierno,pct_industria,geometry,temperature_mean_diff,ndvi_mean,ndmi_diff
0,Servicio,2257.817773,75GRMHR2+458V,1412000011206018,173.7348,278416.454431,2747.599834,0.009869,0.063231,0,...,0.740466,0.231342,0.0,0.0,0.0,0.028192,"MULTIPOLYGON (((2349620.239 965525.905, 234961...",1.425273,0.254363,0.085301
1,Sin dato,711.09856,75GRMGRX+GQP3,1412000011206018,81.7922,278416.454431,900.765482,0.003235,0.090803,0,...,0.586683,0.354285,0.0,0.0,0.0,0.059033,"MULTIPOLYGON (((2349506.214 965639.352, 234950...",2.330655,0.280296,0.070458


## Save data

In [12]:
gdf.to_file('../../data/processed/prediccion_uso_suelo/test/area_of_prediction_env.gpkg')