Carga de librerías

In [1]:
# System
import os
import re
import sys
sys.path.append('../src')

# Overall
import h3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Partial
from settings import Settings
from ipywidgets import widgets

# Notebook settings
settings = Settings()

# Selección de fuente de datos

In [2]:
# Data directory and list of files
DATA_BASE_DIR = os.path.join(settings.ROOT, 'data')
RAW_DATA = os.path.join(DATA_BASE_DIR, 'datos-produccion-maiz')
#PROCESSED_DATA = os.path.join(DATA_BASE_DIR, 'maize_production_h3hex_cells')

LIST_OF_FILES = [file for file in os.listdir(RAW_DATA) if file.startswith('hist')]
H3_CATALOGUE = pd.read_csv(os.path.join(RAW_DATA, '01_h3_cells_catalogue.csv'))
H3_RESOLUTION_LIST = [col for col in H3_CATALOGUE.columns if col.startswith('hex')]

# Declare file selector instance and mesh resolution
file_selector = widgets.Dropdown(
    options=LIST_OF_FILES
    ,description='Files'
    ,disabled=False)

# Display selectors
display(file_selector)

Dropdown(description='Files', options=('hist-maize-panel-rcp2p6.csv', 'hist-maize-panel-rcp8p5.csv'), value='h…

In [3]:
# Store file_selector output in variable
file_path = os.path.join(RAW_DATA, file_selector.value)

# Extract AIRCCA base model and h3 mesh resolution
get_aircca_model = re.compile(r'rcp[0-9]p[0-9]')
model_pointer = get_aircca_model.search(file_path).group()

print(f'Model pointer: {model_pointer.upper()}')

Model pointer: RCP2P6


In [4]:
# Load data into dataframe object
data = (
    pd.read_csv(file_path)
    .join(
        other=H3_CATALOGUE[['id'] + H3_RESOLUTION_LIST]
        ,on='id'
        ,how='left'
        ,rsuffix='__ignore'))

# Drop __ignore and format column names
data.drop(
    columns=[col for col in data.columns if col.__contains__('__ignore')]
    ,inplace=True)

data.columns = [
    re.sub(
        pattern=r'[-\. ]'
        ,repl='_'
        ,string=colname.lower().strip())
    for colname in data.columns]

# List of ordinary predictors, non geographical nor temporal data
ordinary_predictors = [var for var in data.columns if var.startswith('mean')]

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2826720 entries, 0 to 2826719
Data columns (total 25 columns):
 #   Column         Dtype  
---  ------         -----  
 0   lon            float64
 1   lat            float64
 2   year           int64  
 3   mean_precip    float64
 4   mean_precip_2  float64
 5   mean_temp      float64
 6   mean_temp_2    float64
 7   mean_yield     float64
 8   id             int64  
 9   hex_0          object 
 10  hex_1          object 
 11  hex_2          object 
 12  hex_3          object 
 13  hex_4          object 
 14  hex_5          object 
 15  hex_6          object 
 16  hex_7          object 
 17  hex_8          object 
 18  hex_9          object 
 19  hex_10         object 
 20  hex_11         object 
 21  hex_12         object 
 22  hex_13         object 
 23  hex_14         object 
 24  hex_15         object 
dtypes: float64(7), int64(2), object(16)
memory usage: 539.2+ MB


# Visualización inicial

In [5]:
def plot_yield_full_data(variable, year):
    # Visualization data
    _vis = (
        data
        .query(f"year == {year}"))

    # Figure config
    fig, ax = plt.subplots(figsize=(16,7))
    
    # Visualization elements
    map = ax.scatter(
        x=_vis.lon
        ,y=_vis.lat
        ,s=5
        ,c=_vis[variable]
        ,cmap='viridis')

    colorbar = plt.colorbar(map)
    
    # Annotations and styling
    ax.set_title(
        label=f'{model_pointer.upper()}, {year}: {variable}'
        ,fontsize=16)
    ax.set_yticks([])
    ax.set_xticks([])
    ax.set_frame_on(False)

    plt.show()

In [6]:
widgets.interact(
    plot_yield_full_data
    ,variable=widgets.Dropdown(
        options=ordinary_predictors
        ,value=ordinary_predictors[-1]
        ,description='Variable'
        ,disabled=False)
    ,year=widgets.IntSlider(
        min=data.year.unique().min()
        ,max=data.year.unique().max()
        ,step=1))

interactive(children=(Dropdown(description='Variable', index=4, options=('mean_precip', 'mean_precip_2', 'mean…

<function __main__.plot_yield_full_data(variable, year)>

# Visualización con h3-py

Revisar la siguiente [liga](https://github.com/uber/h3-py-notebooks/blob/master/notebooks/unified_data_layers.ipynb) para mayor detalle de las funcionalidades del framework de Uber.

Se empieza definiendo resoluciones de mallado para las celdas hexagonales

**IMPORTANTE**:

En la función `h3.latlng_to_cell(*, res)` el parámetro `res` pareciera que toma `res=15` como valor máximo, y no toma valores negativos. Pero el valor máximo pudiera deberse a recursos computacionales de mi máquina.

In [7]:
def plot_yield_h3(variable, resolution, year, point_size):
    # Visualization data
    _vis = (
        data
        .query(f"year == {year}")
        .filter(items=[resolution] + ordinary_predictors)
        # Group by hexagonal cell identifier
        .groupby(by=resolution)
        # Compute group sizes
        .mean()
        # Reset index for data processing
        .reset_index()
        # Compute groups centroids
        .assign(
            # Get centroid latitude
            lat = lambda _df: _df[resolution].apply(lambda row: h3.cell_to_latlng(row)[0])
            # Get centroid longitude
            ,lon = lambda _df: _df[resolution].apply(lambda row: h3.cell_to_latlng(row)[1]))
    )

    # Figure config
    fig, ax = plt.subplots(figsize=(16,7))
    
    # Visualization elements
    map = ax.scatter(
        x=_vis.lon
        ,y=_vis.lat
        ,s=point_size
        ,c=_vis[variable]
        ,cmap='viridis')

    colorbar = plt.colorbar(map)
    
    # Annotations and styling
    ax.set_title(
        label=f'{model_pointer.upper()}, {year}: {variable}'
        ,fontsize=16)
    ax.set_yticks([])
    ax.set_xticks([])
    ax.set_frame_on(False)

    plt.show()

In [8]:
widgets.interact(
    plot_yield_h3
    ,variable=widgets.Dropdown(
        options=ordinary_predictors
        ,value=ordinary_predictors[-1]
        ,description='Variable'
        ,disabled=False)
    ,resolution=widgets.Dropdown(
        options=H3_RESOLUTION_LIST
        ,description='Resolution'
        ,disabled=False)
    ,year=widgets.IntSlider(
        min=data.year.unique().min()
        ,max=data.year.unique().max()
        ,step=1)
    ,point_size=widgets.IntSlider(
        value=5
        ,min=1
        ,max=250
        ,step=2
        ,description='Point size'
        ,disabled=False)
)

interactive(children=(Dropdown(description='Variable', index=4, options=('mean_precip', 'mean_precip_2', 'mean…

<function __main__.plot_yield_h3(variable, resolution, year, point_size)>