# KPIs

En esta libreta, partimos del dataframe que contiene toda la información de producción agrícola (SIAP), precios de mercado (SNIIM), ...

In [30]:
import pandas as pd
import numpy as np
import os
import scipy.stats as st
from scipy.stats import moment, genextreme
pd.set_option('display.max_columns', None)

In [31]:
directory = '../limpieza'

df = pd.read_csv(os.path.join(directory,"excedente_agrícola.csv"))
df.head()

Unnamed: 0.1,Unnamed: 0,Año,Mes,Cultivo,Estado,Superficie(ha)_Sembrada,Superficie(ha)_Cosechada,Superficie(ha)_Siniestrada,Producción,Rendimiento(udm/ha),Precio,Estado_CVE,Temp_Superficial,Temp_Superficial_MAX,Temp_Superficial_MIN,Temp_2_Metros,Temp_2_Metros_MAX,Temp_2_Metros_MIN,Temp_2_Metros_Pto_Congelación,Temp_2_Metros_Pto_Húmedo,Presión_Superficial,Velocidad_Viento,Humedad_Relativa,Precipitacion,Precipitación,Ivf
0,0,2020,1,Berenjena,Nayarit,60.0,0.0,0.0,0.0,0.0,26.545455,18,19.612645,29.083726,13.741581,19.796194,26.256194,14.834145,11.615113,15.70579,92.821242,1.488903,63.214177,2.056097,60.4,763.0
1,1,2020,1,Berenjena,Yucatán,28.46,0.0,0.0,0.0,0.0,5.83,31,24.66245,30.634364,20.018247,24.568564,30.368262,19.961978,18.301996,21.435265,101.449988,0.637279,70.907876,0.629178,29.5,763.0
2,2,2020,1,Cebolla,Aguascalientes,247.0,7.0,0.0,128.0,18.285714,6.795455,1,11.871672,24.617947,3.252815,11.622434,19.955865,4.849208,2.3361,6.980059,79.269296,2.306334,59.219238,0.8361,38.5,115.2
3,3,2020,1,Cebolla,Baja California Sur,244.25,15.0,0.0,300.0,20.0,15.484848,3,18.086452,27.860968,12.222968,17.575806,24.043097,13.04529,10.594581,14.08529,99.619742,2.680968,67.542323,0.050258,1.9,115.2
4,4,2020,1,Cebolla,Chiapas,218.53,0.0,0.0,0.0,0.0,20.954545,7,20.45848,27.968901,15.246714,20.349943,26.273483,15.924568,16.362354,18.356055,91.695314,1.527048,80.52096,1.351758,28.4,115.2


### Beneficio

Este KIP nos dice el dinero que se perdió, en cosecha siniestrada, con respecto al recaudado con la cosechada. si el número es de signo positivo, se recaudó más, cosechada, que lo perdido, siniestrada, y si es negativo significa que se perdió más de lo que se ganó.

In [32]:
df['Beneficio'] = df.apply(lambda row: \
                 ((row['Superficie(ha)_Cosechada'] - row['Superficie(ha)_Siniestrada']) \
                    / row['Superficie(ha)_Cosechada']) * row['Precio'] \
                 if row['Superficie(ha)_Cosechada'] != 0 \
                 else 0.0, axis=1)

### Eficiencia

Este KIP nos dice la proporción de siniestrada con respecto a la cosechada, entre más grande el número más cultivo se pierde con respecto a lo que se cosecha.

In [None]:
df['Eficiencia'] = df.apply(lambda row: \
                 (row['Superficie(ha)_Siniestrada'] / row['Superficie(ha)_Cosechada']) \
                 if row['Superficie(ha)_Cosechada'] != 0 \
                 else 0.0, axis=1)

In [33]:
df['Presión_Superficial'] = df['Presión_Superficial']*10 # de kp a hp
# elimina columna unnamed
df = df.drop(['Unnamed: 0'], axis=1)
df.head()

Unnamed: 0,Año,Mes,Cultivo,Estado,Superficie(ha)_Sembrada,Superficie(ha)_Cosechada,Superficie(ha)_Siniestrada,Producción,Rendimiento(udm/ha),Precio,Estado_CVE,Temp_Superficial,Temp_Superficial_MAX,Temp_Superficial_MIN,Temp_2_Metros,Temp_2_Metros_MAX,Temp_2_Metros_MIN,Temp_2_Metros_Pto_Congelación,Temp_2_Metros_Pto_Húmedo,Presión_Superficial,Velocidad_Viento,Humedad_Relativa,Precipitacion,Precipitación,Ivf,Beneficio,Eficiencia
0,2020,1,Berenjena,Nayarit,60.0,0.0,0.0,0.0,0.0,26.545455,18,19.612645,29.083726,13.741581,19.796194,26.256194,14.834145,11.615113,15.70579,928.212419,1.488903,63.214177,2.056097,60.4,763.0,0.0,0.0
1,2020,1,Berenjena,Yucatán,28.46,0.0,0.0,0.0,0.0,5.83,31,24.66245,30.634364,20.018247,24.568564,30.368262,19.961978,18.301996,21.435265,1014.499878,0.637279,70.907876,0.629178,29.5,763.0,0.0,0.0
2,2020,1,Cebolla,Aguascalientes,247.0,7.0,0.0,128.0,18.285714,6.795455,1,11.871672,24.617947,3.252815,11.622434,19.955865,4.849208,2.3361,6.980059,792.692962,2.306334,59.219238,0.8361,38.5,115.2,6.795455,0.0
3,2020,1,Cebolla,Baja California Sur,244.25,15.0,0.0,300.0,20.0,15.484848,3,18.086452,27.860968,12.222968,17.575806,24.043097,13.04529,10.594581,14.08529,996.197419,2.680968,67.542323,0.050258,1.9,115.2,15.484848,0.0
4,2020,1,Cebolla,Chiapas,218.53,0.0,0.0,0.0,0.0,20.954545,7,20.45848,27.968901,15.246714,20.349943,26.273483,15.924568,16.362354,18.356055,916.953144,1.527048,80.52096,1.351758,28.4,115.2,0.0,0.0


### Indice de stress termico

In [34]:
# Crea una columna de indice de stress termico
df['Rango_Temp_2_Metros'] = (df['Temp_2_Metros_MAX'] - df['Temp_2_Metros_MIN'])
df['Indice_stress_termico'] = df['Rango_Temp_2_Metros']/max(df['Rango_Temp_2_Metros'])

### Evapotranspiración potencial (ETP)

In [35]:
Ro = 5.5 * 0.408 # Valor promedio historico de la constante de radiación solar para mexico
df['Evapotranspiracion'] = 0.043 * (df['Temp_Superficial'] + 17.78) * Ro * (df['Temp_Superficial_MAX'] - df['Temp_Superficial_MIN'])**0.5

### SPEI y Balance Hídrico
Basado en: https://github.com/monocongo/climate_indices y https://spei.csic.es/home.html

In [36]:
# Definir una función para calcular el balance hídrico climático (D)
def climatic_water_balance(precip, evpt):
    """ 
    Precip y pet son series de Pandas con los datos mensuales de precipitación y evapotranspiración potencial
     Devuelve una serie de Pandas con el balance hídrico climático (D) para cada mes """
    return precip - evpt

# Definir una función para ajustar una distribución log-logística a los datos de D
def log_logistic_fit(D):
    """ 
    D es una serie de Pandas con el balance hídrico climático
    Devuelve los parámetros de la distribución log-logística ajustada por el método de los momentos-L """
    # Calcular los momentos-L
    mean_d = np.mean(D)
    std_d = np.std(D)
    skewness_d = moment(D, moment=3) / np.power(std_d, 3)
    kurtosis_d = moment(D, moment=4) / np.power(std_d, 4) - 3

    # Calcular los parámetros de la distribución log-logística
    alpha = std_d * np.sqrt(6) / np.pi
    beta = skewness_d * np.power(alpha, 2) / 2
    gamma = mean_d - 0.5772 * alpha / beta

    return alpha, beta, gamma

# Definir una función para calcular el SPEI simplificado a partir de D y los parámetros de la distribución
def spei(D, alpha, beta, gamma):
    """ 
    D: balance hídrico climático
    # alpha, beta y gamma son los parámetros de la distribución log-logística """
    F = st.logistic.cdf(D, loc=gamma, scale=alpha)  # Eliminar el argumento 'c'
    P = 1 - F  # Calcular la probabilidad de no excedencia de D
    SPEI = st.norm.ppf(P)  # Aplicar la transformación inversa de la normal estándar
    return SPEI

balance = climatic_water_balance(df['Precipitacion'], df['Evapotranspiracion'])
fit = log_logistic_fit(balance)

df['Balance_Hidrico'] = balance
df['SPEI'] = spei(balance, fit[0], fit[1], fit[2])
df.head()

Unnamed: 0,Año,Mes,Cultivo,Estado,Superficie(ha)_Sembrada,Superficie(ha)_Cosechada,Superficie(ha)_Siniestrada,Producción,Rendimiento(udm/ha),Precio,Estado_CVE,Temp_Superficial,Temp_Superficial_MAX,Temp_Superficial_MIN,Temp_2_Metros,Temp_2_Metros_MAX,Temp_2_Metros_MIN,Temp_2_Metros_Pto_Congelación,Temp_2_Metros_Pto_Húmedo,Presión_Superficial,Velocidad_Viento,Humedad_Relativa,Precipitacion,Precipitación,Ivf,Beneficio,Eficiencia,Rango_Temp_2_Metros,Indice_stress_termico,Evapotranspiracion,Balance_Hidrico,SPEI
0,2020,1,Berenjena,Nayarit,60.0,0.0,0.0,0.0,0.0,26.545455,18,19.612645,29.083726,13.741581,19.796194,26.256194,14.834145,11.615113,15.70579,928.212419,1.488903,63.214177,2.056097,60.4,763.0,0.0,0.0,11.422048,0.553102,14.13255,-12.076454,-0.330509
1,2020,1,Berenjena,Yucatán,28.46,0.0,0.0,0.0,0.0,5.83,31,24.66245,30.634364,20.018247,24.568564,30.368262,19.961978,18.301996,21.435265,1014.499878,0.637279,70.907876,0.629178,29.5,763.0,0.0,0.0,10.406284,0.503915,13.343649,-12.71447,-0.23861
2,2020,1,Cebolla,Aguascalientes,247.0,7.0,0.0,128.0,18.285714,6.795455,1,11.871672,24.617947,3.252815,11.622434,19.955865,4.849208,2.3361,6.980059,792.692962,2.306334,59.219238,0.8361,38.5,115.2,6.795455,0.0,15.106657,0.731526,13.224927,-12.388827,-0.285597
3,2020,1,Cebolla,Baja California Sur,244.25,15.0,0.0,300.0,20.0,15.484848,3,18.086452,27.860968,12.222968,17.575806,24.043097,13.04529,10.594581,14.08529,996.197419,2.680968,67.542323,0.050258,1.9,115.2,15.484848,0.0,10.997806,0.532559,13.685804,-13.635546,-0.105026
4,2020,1,Cebolla,Chiapas,218.53,0.0,0.0,0.0,0.0,20.954545,7,20.45848,27.968901,15.246714,20.349943,26.273483,15.924568,16.362354,18.356055,916.953144,1.527048,80.52096,1.351758,28.4,115.2,0.0,0.0,10.348915,0.501137,13.160513,-11.808755,-0.368857


In [37]:
df = df.round(3)
# save df to an excel file
df.to_excel('excedente_agrícola_kpi.xlsx', index=False)