Carga de librerías

In [1]:
# System
import os
import re
import sys
sys.path.append('../src')

# Overall
import h3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Partial
from settings import ROOT
from ipywidgets import widgets

# Selección de fuente de datos

In [2]:
# Data directory and list of files
DATA_BASE_DIR = os.path.join(ROOT, 'data')
RAW_DATA = os.path.join(DATA_BASE_DIR, 'datos-produccion-maiz')
PROCESSED_DATA = os.path.join(DATA_BASE_DIR, 'maize_production_h3hex_cells')

LIST_OF_FILES = [file for file in os.listdir(RAW_DATA) if file.startswith('hist')]

# Declare file selector instance
file_selector = widgets.Dropdown(
    options=LIST_OF_FILES
    ,description='Files'
    ,disabled=False)

# Display file selector
display(file_selector)

Dropdown(description='Files', options=('hist-maize-panel-rcp2p6.csv', 'hist-maize-panel-rcp8p5.csv'), value='h…

In [3]:
# Store file_selector output in variable
file_path = os.path.join(RAW_DATA, file_selector.value)

# Extract AIRCCA base model
get_aricca_model = re.compile(r'rcp[0-9]p[0-9]')
model_pointer = get_aricca_model.search(file_path).group()

print(f'Model pointer: {model_pointer.upper()}')

Model pointer: RCP2P6


In [4]:
# Load data into dataframe object
data = pd.read_csv(file_path)

# Column name formatter
data.columns = [
    re.sub(
        pattern=r'[-\. ]'
        ,repl='_'
        ,string=colname.lower().strip())
    for colname in data.columns]

# List of ordinary predictors, non geographical nor temporal data
ordinary_predictors = [var for var in data.columns if var.startswith('mean')]

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2826720 entries, 0 to 2826719
Data columns (total 9 columns):
 #   Column         Dtype  
---  ------         -----  
 0   lon            float64
 1   lat            float64
 2   year           int64  
 3   mean_precip    float64
 4   mean_precip_2  float64
 5   mean_temp      float64
 6   mean_temp_2    float64
 7   mean_yield     float64
 8   id             int64  
dtypes: float64(7), int64(2)
memory usage: 194.1 MB


# Visualización inicial

In [5]:
def plot_yield(variable, year):
    # Visualization data
    _vis = (
        data
        .query(f"year == {year}"))

    # Figure config
    fig, ax = plt.subplots(figsize=(16,7))
    
    # Visualization elements
    map = ax.scatter(
        x=_vis.lon
        ,y=_vis.lat
        ,s=5
        ,c=_vis[variable]
        ,cmap='viridis')

    colorbar = plt.colorbar(map)
    
    # Annotations and styling
    ax.set_title(
        label=f'{model_pointer.upper()}, {year}: {variable}'
        ,fontsize=16)
    ax.set_yticks([])
    ax.set_xticks([])
    ax.set_frame_on(False)

    plt.show()

In [6]:
widgets.interact(
    plot_yield
    ,variable=widgets.Dropdown(
        options=ordinary_predictors
        ,value=ordinary_predictors[-1]
        ,description='Files'
        ,disabled=False)
    ,year=widgets.IntSlider(
        min=data.year.unique().min()
        ,max=data.year.unique().max()
        ,step=1))

interactive(children=(Dropdown(description='Files', index=4, options=('mean_precip', 'mean_precip_2', 'mean_te…

<function __main__.plot_yield(variable, year)>

# Procesamiento con h3-py

Revisar la siguiente [liga](https://github.com/uber/h3-py-notebooks/blob/master/notebooks/unified_data_layers.ipynb) para mayor detalle de las funcionalidades del framework de Uber.

Se empieza definiendo resoluciones de mallado para las celdas hexagonales

**IMPORTANTE**:

En la función `h3.latlng_to_cell(*, res)` el parámetro `res` pareciera que toma `res=15` como valor máximo, y no toma valores negativos. Pero el valor máximo pudiera deberse a recursos computacionales de mi máquina.

In [7]:
%%time

for mesh_resolution in np.arange(0, 16):
    
    # Build resolution column name
    hex_cell = 'hex_' + str(int(mesh_resolution))

    # Heaxgonal cells identifiers
    data[hex_cell] = data.apply(
        func=lambda row: h3.latlng_to_cell(
            lat=row.lat
            ,lng=row.lon
            ,res=mesh_resolution)
        ,axis=1)

    print(hex_cell)

hex_0
hex_1
hex_2
hex_3
hex_4
hex_5
hex_6
hex_7
hex_8
hex_9
hex_10
hex_11
hex_12
hex_13
hex_14
hex_15
CPU times: total: 14min 5s
Wall time: 14min 27s


El tiempo de procesamiento para generar las resoluciones anteriores es de, por lo que por conveniencia, se almacenan los resultados de la segmentación hexagonal h3 en un archivo externo.

In [8]:
# Declare storage path
out_path = os.path.join(PROCESSED_DATA, file_selector.value)

# Save results
data.to_csv(out_path, index=False)

In [9]:
data

Unnamed: 0,lon,lat,year,mean_precip,mean_precip_2,mean_temp,mean_temp_2,mean_yield,id,hex_0,...,hex_6,hex_7,hex_8,hex_9,hex_10,hex_11,hex_12,hex_13,hex_14,hex_15
0,-128.75,54.25,2005,147.540523,29514.932301,4.395718,97.616353,1.775000,1,8013fffffffffff,...,861280d87ffffff,871280d81ffffff,881280d813fffff,891280d813bffff,8a1280d813a7fff,8b1280d813a5fff,8c1280d813a53ff,8d1280d813a523f,8e1280d813a521f,8f1280d813a5218
1,-128.75,54.25,2006,149.057648,28160.320401,4.186324,78.692934,1.045000,1,8013fffffffffff,...,861280d87ffffff,871280d81ffffff,881280d813fffff,891280d813bffff,8a1280d813a7fff,8b1280d813a5fff,8c1280d813a53ff,8d1280d813a523f,8e1280d813a521f,8f1280d813a5218
2,-128.75,54.25,2007,148.248154,27890.185453,6.083475,84.443645,2.071000,1,8013fffffffffff,...,861280d87ffffff,871280d81ffffff,881280d813fffff,891280d813bffff,8a1280d813a7fff,8b1280d813a5fff,8c1280d813a53ff,8d1280d813a523f,8e1280d813a521f,8f1280d813a5218
3,-128.75,54.25,2008,167.495560,35660.725831,7.565228,106.691952,3.984267,1,8013fffffffffff,...,861280d87ffffff,871280d81ffffff,881280d813fffff,891280d813bffff,8a1280d813a7fff,8b1280d813a5fff,8c1280d813a53ff,8d1280d813a523f,8e1280d813a521f,8f1280d813a5218
4,-128.75,54.25,2009,175.237379,38488.376016,7.152213,88.141152,1.617000,1,8013fffffffffff,...,861280d87ffffff,871280d81ffffff,881280d813fffff,891280d813bffff,8a1280d813a7fff,8b1280d813a5fff,8c1280d813a53ff,8d1280d813a523f,8e1280d813a521f,8f1280d813a5218
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2826715,178.25,-37.75,2094,138.138955,24123.317255,14.284397,218.927381,6.580750,31408,80bbfffffffffff,...,86ba24137ffffff,87ba24131ffffff,88ba24131dfffff,89ba24131cbffff,8aba24131c87fff,8bba24131c86fff,8cba24131c865ff,8dba24131c864ff,8eba24131c864f7,8fba24131c864f2
2826716,178.25,-37.75,2095,164.987107,36365.936754,14.033156,211.958837,6.760167,31408,80bbfffffffffff,...,86ba24137ffffff,87ba24131ffffff,88ba24131dfffff,89ba24131cbffff,8aba24131c87fff,8bba24131c86fff,8cba24131c865ff,8dba24131c864ff,8eba24131c864f7,8fba24131c864f2
2826717,178.25,-37.75,2096,152.250917,29167.725057,14.288360,214.869387,6.461292,31408,80bbfffffffffff,...,86ba24137ffffff,87ba24131ffffff,88ba24131dfffff,89ba24131cbffff,8aba24131c87fff,8bba24131c86fff,8cba24131c865ff,8dba24131c864ff,8eba24131c864f7,8fba24131c864f2
2826718,178.25,-37.75,2097,184.123849,42846.036997,14.578683,225.503488,7.314250,31408,80bbfffffffffff,...,86ba24137ffffff,87ba24131ffffff,88ba24131dfffff,89ba24131cbffff,8aba24131c87fff,8bba24131c86fff,8cba24131c865ff,8dba24131c864ff,8eba24131c864f7,8fba24131c864f2
