Carga de librerías

In [1]:
# System management packages
import os
import re
import sys
import warnings
from numba import NumbaDeprecationWarning

sys.path.append('../src')
warnings.filterwarnings(action='ignore', category=NumbaDeprecationWarning)

# Data science packages
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import imageio

# Geospatial packages
import h3
import geopandas as gpd
from shapely import Polygon
from pysal.lib import weights
from pysal.explore import esda
from splot.esda import moran_scatterplot, lisa_cluster

# Personal packages
from settings import Settings

# Notebook settings
settings = Settings()

  from .autonotebook import tqdm as notebook_tqdm


# Selección de fuente de datos

In [2]:
# Data directory and list of files
DATA_BASE_DIR = os.path.join(settings.ROOT, 'data')
RAW_DATA = os.path.join(DATA_BASE_DIR, 'datos-produccion-maiz')
#PROCESSED_DATA = os.path.join(DATA_BASE_DIR, 'maize_production_h3hex_cells')

LIST_OF_FILES = [file for file in os.listdir(RAW_DATA)] #if file.startswith('agg')]
H3_CATALOGUE = pd.read_csv(os.path.join(RAW_DATA, '01_h3_cells_catalogue.csv'))
H3_RESOLUTION_LIST = [col for col in H3_CATALOGUE.columns if col.startswith('hex')]

# Declare file selector instance and mesh resolution
# file_selector = widgets.Dropdown(
#     options=LIST_OF_FILES
#     ,description='Files'
#     ,disabled=False)

# # Display selectors
# display(file_selector)

file_selector = 'hist-maize-panel-rcp2p6.csv'

In [3]:
# Store file_selector output in variable
file_path = os.path.join(RAW_DATA, file_selector)

# Extract AIRCCA base model and h3 mesh resolution
get_aircca_model = re.compile(r'rcp[0-9]p[0-9]')
model_pointer = get_aircca_model.search(file_path).group()

print(f'Model pointer: {model_pointer.upper()}')

Model pointer: RCP2P6


In [4]:
# Load data into dataframe object
data = (
    pd.read_csv(file_path)
    .query("year >= 2005 and year <= 2020")
    # .join(
    #     other=H3_CATALOGUE[['id'] + H3_RESOLUTION_LIST]
    #     ,on='id'
    #     ,how='left'
    #     ,rsuffix='__ignore')
)

# Drop __ignore and format column names
data.drop(
    columns=[col for col in data.columns if col.__contains__('__ignore')]
    ,inplace=True)

data.columns = [
    re.sub(
        pattern=r'[-\. ]'
        ,repl='_'
        ,string=colname.lower().strip())
    for colname in data.columns]

# List of ordinary predictors, non geographical nor temporal data
ordinary_predictors = [var for var in data.columns if var.startswith('mean')]

data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 502528 entries, 0 to 2826645
Data columns (total 9 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   lon            502528 non-null  float64
 1   lat            502528 non-null  float64
 2   year           502528 non-null  int64  
 3   mean_precip    502528 non-null  float64
 4   mean_precip_2  502528 non-null  float64
 5   mean_temp      502528 non-null  float64
 6   mean_temp_2    502528 non-null  float64
 7   mean_yield     502528 non-null  float64
 8   id             502528 non-null  int64  
dtypes: float64(7), int64(2)
memory usage: 38.3 MB


# Autocorrelación espacial global

In [13]:
for variable in ['mean_yield']:
    for year in data.year.unique():
        print(f"{variable}:{year}")
        # Visualization data
        _vis = (
            data
            .query(f"year == {year}")
            .assign(geometry=lambda x: gpd.points_from_xy(x.lon, x.lat)))
        
        # Matriz de pesos bajo criterio de reinas
        w = weights.KNN.from_dataframe(
            df=_vis,
            geom_col="geometry",
            silence_warnings=True)
        
        # Estandarización por renglón
        w.set_transform(value='R')

        # Calcular columnas de rezago
        _vis = (
            _vis
            .assign(
                # Rezago espacial de la variable de interés
                lag = lambda _df: weights.spatial_lag.lag_spatial(w=w, y=_df[variable])
                # Centrar a la media y escalar a 2 std
                ,scaled = lambda _df: (_df[variable] - _df[variable].mean()) / (_df[variable].std() * 2)
                ,lag_scaled = lambda _df: (_df.lag - _df.lag.mean()) / (_df.lag.std() * 2)))

        # Figure config
        fig, ax = plt.subplots(figsize=(16,7))

        # Elementos de visualización
        sns.regplot(
            data=_vis,
            x='scaled',
            y='lag_scaled',
            ci=None,
            line_kws=dict(color='tab:red'),
            ax=ax)
        ax.axvline(0, color='black', linewidth=1, alpha=0.8)
        ax.axhline(0, color='black', linewidth=1, alpha=0.8)


        # Anotaciones y estilo
        ax.set(
            title=f'Dispersión de Moran, {year}'
            ,xlabel='Variable de análisis'
            ,ylabel='Rezago espacial')

        fig.tight_layout()
        fig.savefig(f"../figures/autocor/{variable}/global/glob_autocor_{year}.png")

        plt.close()

mean_yield:2005
mean_yield:2006
mean_yield:2007
mean_yield:2008
mean_yield:2009
mean_yield:2010
mean_yield:2011
mean_yield:2012
mean_yield:2013
mean_yield:2014
mean_yield:2015
mean_yield:2016
mean_yield:2017
mean_yield:2018
mean_yield:2019
mean_yield:2020


In [14]:
variable = "mean_yield"

# List of image file paths
image_files = os.listdir(f'../figures/autocor/{variable}/global/')

# Read images and save as GIF
images = [imageio.imread(f"../figures/autocor/{variable}/global/{image}") for image in image_files]
imageio.mimsave(f'../figures/autocor/global_{variable}.gif', images, format='GIF', duration=500, loop=4)

  images = [imageio.imread(f"../figures/autocor/{variable}/global/{image}") for image in image_files]


# Autocorrelación local

In [26]:
p = 0.05
for variable in ['mean_yield']:
    for year in data.year.unique():
        print(f"{variable}:{year}")
        # Visualization data
        _vis = (
            data
            .query(f"year == {year}")
            .assign(geometry=lambda x: gpd.points_from_xy(x.lon, x.lat)))
        
        # Matriz de pesos bajo criterio de reinas
        w = weights.KNN.from_dataframe(
            df=_vis,
            geom_col="geometry",
            silence_warnings=True)
        
        # Estandarización por renglón
        w.set_transform(value='R')

        # Calcular columnas de rezago
        _vis = (
            _vis
            .assign(
                # Rezago espacial de la variable de interés
                lag = lambda _df: weights.spatial_lag.lag_spatial(w=w, y=_df[variable]),
                # Centrar a la media y escalar a 2 std
                scaled = lambda _df: (_df[variable] - _df[variable].mean()) / (_df[variable].std() * 2),
                lag_scaled = lambda _df: (_df.lag - _df.lag.mean()) / (_df.lag.std() * 2)))

        # Estadístico Moran I local
        _vis_lisa = esda.moran.Moran_Local(
            y=_vis[variable]
            ,w=w
            ,transformation='R'
            ,permutations=1_000
            ,n_jobs=-1)
        
        # Asignar valores de Moran's Local LISAs a datos originales
        _vis = _vis.assign(ML_Is=_vis_lisa.Is)

        # Figure config
        fig, ax = plt.subplots(figsize=(16,7))

        # Elementos de visualización
        lisa_cluster(
            moran_loc=_vis_lisa,
            gdf=gpd.GeoDataFrame(_vis),
            p=p,
            ax=ax,)


        # Anotaciones y estilo
        ax.set(
            title=f'LISA: {p}, {year}',
            # xlabel='Variable de análisis',
            # ylabel='Rezago espacial'
        )

        fig.tight_layout()
        fig.savefig(f"../figures/autocor/{variable}/local/glob_autocor_{year}.png")

        plt.close()

mean_yield:2005
mean_yield:2006
mean_yield:2007
mean_yield:2008
mean_yield:2009
mean_yield:2010
mean_yield:2011
mean_yield:2012
mean_yield:2013
mean_yield:2014
mean_yield:2015
mean_yield:2016
mean_yield:2017
mean_yield:2018
mean_yield:2019
mean_yield:2020


In [27]:
variable = "mean_yield"

# List of image file paths
image_files = os.listdir(f'../figures/autocor/{variable}/local/')

# Read images and save as GIF
images = [imageio.imread(f"../figures/autocor/{variable}/local/{image}") for image in image_files]
imageio.mimsave(f'../figures/autocor/local_{variable}.gif', images, format='GIF', duration=500, loop=4)

  images = [imageio.imread(f"../figures/autocor/{variable}/local/{image}") for image in image_files]
