## FDW Crop Production Data Analysis - Kenya

In [1]:
import os, sys, glob, json
from itertools import product, compress, chain
from functools import reduce
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import seaborn as sns
from tools import save_hdf, save_npz, load_npz, PrintAdminUnits, PlotAdminShapes
from tools import FDW_PD_Sweeper, FDW_PD_AvalTable, FDW_PD_Compiling, FDW_PD_GrainTypeAgg, FDW_PD_ValidateFnidName
from tools import FDW_PD_CreateAdminLink, FDW_PD_RatioAdminLink, FDW_PD_ConnectAdminLink
from tools import FDW_PD_CaliSeasonYear
from tools_graphic import PlotBarProduction, PlotLinePAY, PlotHeatCropSystem, PlotHeatSeasonData
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

In [2]:
# Load GSCD crop data -------------------------------- #
country_iso, country_name = 'KE', 'Kenya'
df = pd.read_hdf('../data/crop/adm_crop_production_%s.hdf' % country_iso)
df = df[df['gscd_code']=='calibrated']; fnids = list(df['fnid'].unique())
data_fdw = df[
    (df['season_name'] == 'Annual') &
    (df['product'].isin(['Maize'])) &
    (df['indicator'] == 'production')
]

In [3]:
data_fdw[data_fdw.name=='Trans Nzoia']

Unnamed: 0,fnid,country,country_code,admin_1,admin_2,name,product,season_name,planting_year,planting_month,harvest_year,harvest_month,crop_production_system,indicator,value,gscd_code
23350,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,1982,10-01,1982,12-01,none,production,166944.0,calibrated
23353,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,1983,10-01,1983,12-01,none,production,145323.0,calibrated
23356,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,1984,10-01,1984,12-01,none,production,133659.0,calibrated
23359,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,1985,10-01,1985,12-01,none,production,174759.0,calibrated
23362,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,1986,10-01,1986,12-01,none,production,181383.0,calibrated
23365,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,1987,10-01,1987,12-01,none,production,151710.0,calibrated
23368,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,1988,10-01,1988,12-01,none,production,245088.0,calibrated
23371,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,1989,10-01,1989,12-01,none,production,276818.0,calibrated
23374,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,1990,10-01,1990,12-01,none,production,177431.0,calibrated
23377,KE2013A126,Kenya,KE,Trans Nzoia,none,Trans Nzoia,Maize,Annual,2002,10-01,2002,12-01,none,production,208902.0,calibrated


In [4]:
# Load GSCD crop data -------------------------------- #
country_iso, country_name = 'KE', 'Kenya'
df = pd.read_hdf('../data/crop/adm_crop_production_%s.hdf' % country_iso)
df = df[df['gscd_code']=='calibrated']; fnids = list(df['fnid'].unique())
# long
data_fdw = df[
    (df['season_name'] == 'Long') &
    (df['product'].isin(['Maize'])) &
    (df['indicator'] == 'production')
]
data_fdw['year'] = data_fdw['harvest_year']
data_fdw_long = data_fdw.pivot_table(index='year',columns='product',values='value',aggfunc='sum').sum(1).rename('Maize')
# short
data_fdw = df[
    (df['season_name'] == 'Short') &
    (df['product'].isin(['Maize'])) &
    (df['indicator'] == 'production')
]
data_fdw['year'] = data_fdw['harvest_year']
data_fdw_short = data_fdw.pivot_table(index='year',columns='product',values='value',aggfunc='sum').sum(1).rename('Maize')
# data_fdw_short.index = data_fdw_short.index - 1
# Annual
data_fdw = df[
    (df['season_name'] == 'Annual') &
    (df['product'].isin(['Maize'])) &
    (df['indicator'] == 'production')
]
data_fdw['year'] = data_fdw['harvest_year']
data_fdw_annual = data_fdw.pivot_table(index='year',columns='product',values='value',aggfunc='sum').sum(1).rename('Maize')
data_fdw = pd.concat([data_fdw_long, data_fdw_short, data_fdw_annual],axis=1).sum(1)
# data_fdw.index = data_fdw.index - 1
# ---------------------------------------------------- #

In [5]:
# Load FAO-STAT National Production Data ------------- #
df = pd.read_csv('../data/crop/fao.csv', index_col=0)
data_fao1 = df[
    (df['cnt_name'] == 'Kenya') &
    (df['cpc2_name'] == 'Maize (corn)') &
    (df['indicator'] == 'Production')
].set_index('year')['value']
# data_fao1.index = data_fao1.index - 1
# ---------------------------------------------------- #

# Load FAO-AgroMaps Subnational Production Data ------ #
df = pd.read_csv('../data/crop/adm_fao_agromaps.csv', index_col=0, low_memory=False)
data_fao2 = df[
    (df['cnt_name'] == 'Kenya') &
    (df['cpc2_name'] == 'Maize (corn)') &
    (df['adm_level'] == 2) &
    (df['indicator'] == 'Production')
].pivot_table(index='year',columns='admx_code',values='value')
# print(data_fao2)
# data_fao2.index = data_fao2.index - 1
data_fao2 = data_fao2.sum(1)
# ---------------------------------------------------- #

# Merge all data ------------------------------------- #
data = pd.concat([data_fdw,data_fao1,data_fao2], axis=1, keys=['FDW','FAO_STAT','FAO_AMAP']).sort_index()
# ---------------------------------------------------- #
data.tail()

KeyError: 'cnt_name'

In [None]:
# Load precipitation
prcp = pd.read_hdf('../data/adm.prcp.chirps-v2.crop.extended.hdf')[fnids]
prcp = prcp.resample('1M').agg('sum')
prcp.index = prcp.index.map(lambda x: x.replace(day=1))
assert prcp.isna().sum().sum() == 0 # if not, use FillMissMonthMean
# National seasonal precipitation
prcp_nat = prcp.mean(1).rename('PRCP')
# Forward rolling summation
indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=4)   # Forward Rolling Window
prcp_nat = prcp_nat.rolling(window=indexer, min_periods=3).sum()
prcp_nat = prcp_nat[prcp_nat.index.month == 3]                      # Start month
prcp_nat = prcp_nat.groupby(prcp_nat.index.year).sum()
prcp_label = 'MAM PRCP (mm)'
# Correlations
corr_value = pd.concat([data,prcp_nat],axis=1).corr()
print(f'Correlation:\n{corr_value}')

In [None]:
years = np.arange(1982,2023)
sns.set(style="white", font_scale=1.5)
fig, ax1 = plt.subplots(nrows=1, ncols=1, sharex=True, figsize=(12,4))
data.plot(ax=ax1, legend=None, linewidth=2.5, marker=None, color=['#405E7B','#FB6222','#17AB6F'])
ax1.set_xlim([years.min()-0.5, years.max()+0.5])
ax1.set_xticks(years)
ax1.set_xticklabels(['%02d'% yr for yr in np.mod(years,100)])
ax1.xaxis.grid(True)
ax1.grid(True)
ax1.set_xlabel('')
ax1.set_ylabel('National Production (ton)')
year, value = prcp_nat.index, prcp_nat.values
ax2 = ax1.twinx()
plt.bar(year, value, alpha=.25, color='grey')
ax2.set_ylim([0, value.max()*1.3])
ax2.set_ylabel(prcp_label, labelpad=10)
ax1.set_zorder(1)  # default zorder is 0 for ax1 and ax2
ax1.patch.set_visible(False)  # prevents ax1 from hiding ax2
ax1.legend(bbox_to_anchor=(0.5, 1.01), columnspacing=0.7, ncol=3, fontsize=14)
plt.tight_layout(pad=0)
plt.show()