## FDW Crop Production Data Profiling - Kenya

In [2]:
import os, sys, glob, json
from itertools import product, compress, chain
from functools import reduce
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_hdf, save_npz, load_npz, PrintAdminUnits, PlotAdminShapes
from tools import FDW_PD_Sweeper, FDW_PD_AvalTable, FDW_PD_Compiling, FDW_PD_GrainTypeAgg, FDW_PD_ValidateFnidName
from tools import FDW_PD_CreateAdminLink, FDW_PD_RatioAdminLink, FDW_PD_ConnectAdminLink
from tools import FDW_PD_CaliSeasonYear
from tools_graphic import PlotBarProduction, PlotLinePAY, PlotHeatCropSystem, PlotHeatSeasonData
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

In [2]:
# CPCV2 grain code ------------------------------ #
grain_code = pd.read_hdf('../data/crop/grain_cpcv2_code.hdf')
product_category = grain_code[['product', 'product_category']].set_index('product').to_dict()['product_category']
# ----------------------------------------------- #

# Load FEWS NET administrative boundaries ------- #
epsg = 'epsg:32736' # Kenya
fn_shapes = sorted(glob.glob('../data/shapefile/fewsnet/KE_Admin?_????.shp'))
shape_all = []
for fn in fn_shapes:
    name = fn[-18:-4]
    exec('%s = gpd.read_file("%s").to_crs("%s")' % (name, fn, epsg))
    exec('%s = %s.dissolve("FNID").reset_index()' % (name, name))
    exec('%s["area"] = %s["geometry"].area/10**6' % (name, name))
    exec('shape_all.append(%s)' % (name))
shape_all = pd.concat(shape_all, axis=0).reset_index(drop=True)
PrintAdminUnits(shape_all)
# ----------------------------------------------- #

# FDW API host address -------------------------- #
host = 'https://fdw.fews.net'
auth = tuple(json.loads(open('token.json', "r").read()))
parameters = {
    'format': 'json',
    'country': 'Kenya',
    'product': 'R011',
    'survey_type': 'crop:best'
}
endpoint = '/api/cropproductionindicatorvalue/'
response = requests.get(host + endpoint, auth=auth, params=parameters, proxies={})
response.raise_for_status()
df = pd.DataFrame.from_records(response.json())
df_origin = df.copy()
# ----------------------------------------------- #

- FEWS NET admin shapefiles ------------------- #
| year	 | Admin1   | # units   | Admin2   | # units   | Admin3   | # units   |
| 1982	 | KE1982A1 | 8	| KE1982A2	| 41	| nan	| 0	|
| 1989	 | KE1989A1 | 8	| KE1989A2	| 47	| nan	| 0	|
| 2007	 | KE2007A1 | 8	| KE2007A2	| 70	| nan	| 0	|
| 2009	 | KE2009A1 | 8	| KE2009A2	| 47	| nan	| 0	|
| 2013	 | KE2013A1 | 47	| KE2013A2	| 290	| nan	| 0	|
----------------------------------------------- #


In [3]:
# Manual Pre-processing before Sweeping --------- #
# 1. Default setting 
# a) None-type crop production system
df.loc[df['crop_production_system'].isna(), 'crop_production_system'] = 'none'
df.loc[df['crop_production_system'] == '', 'crop_production_system'] = 'none'
# b) None-type population group
df.loc[df['population_group'].isna(), 'population_group'] = 'none'
df.loc[df['population_group'] == '', 'population_group'] = 'none'
# 2. Manual setting
# a) Add admin names to reporting units
admin_level = 'admin_1'
reporting_unit = df.loc[df['fnid'].map(lambda x: x[6] != 'A'), 'fnid'].unique()
df.loc[df.fnid.isin(reporting_unit),admin_level] = df.loc[df.fnid.isin(reporting_unit), 'locality_name'].map(lambda x: x.split(' ')[0])
# b) Shift the "start_date" and "period_date" of the Annual harvest season
# The harvest month of the Annual harvest season is January of the following year of the production.
# We moved the "start date" and "period date" 1 month earlier as they became December.
idx = df['season_name'] == 'Annual harvest'
for col_date in ['start_date', 'period_date']:
    delayed = pd.to_datetime(df.loc[idx, col_date]) - pd.offsets.DateOffset(months=1)
    df.loc[idx, col_date] = delayed.dt.strftime('%Y-%m-%d')
# ----------------------------------------------- #

# FDW Production Data Inspection ---------------- #
df, df_raw = FDW_PD_Sweeper(df, area_priority='Area Planted')
table_dict = FDW_PD_AvalTable(df, shape_all)
# ----------------------------------------------- #

# FEWS NET Shapefile comparison ----------------- #
shape_used = pd.concat([KE_Admin2_1982, KE_Admin2_1989, KE_Admin1_2013], axis=0)
PlotAdminShapes(shape_used, label=True)
# ----------------------------------------------- #

- Remove missing records ---------------------- #
Orignial data points: 9,697
Removed 214 "Missing Value" points
3,180/3,203 "Area Planted" points are retained.
3,163/3,203 "Quantity Produced" points are retained.
3,140/3,203 "Yield" points are retained.
0/88 "Area Harvested" points are retained.
Current data points: 9,483

- Minor changes are applied ------------------- #

- Basic information --------------------------- #
Data period: 1982 - 2019
6 grain types are found: Barley (Unspecified), Maize Grain (White), Millet, Rice (Paddy), Sorghum, Wheat Grain
3 seasons are found: Annual harvest (10-01), Long rains harvest (07-01), Short rains harvest (02-01)
1 crop production system are found: none
Data sources include:
[1] Ministry of Agricultural and Livestock Development, Kenya --- FEWS NET Agro Maps, Kenya
[2] Ministry of Agricultural and Livestock Development, Kenya --- Official Agricultural Statistics, Kenya
[3] Ministry of Agriculture, Livestock and Fisheries, Kenya --- FEWS NET Ag

- Kenya crop seasonal calendar

![FEWS NET](https://fews.net/sites/default/files/styles/large_width_880/public/2023-03/seasonal-calendar-kenya.png?itok=0Wob_hCK)

- FDW data consists of `KE1982A1`, `KE1982A2`, `KE1989A1`, `KE1989A2`, `KE2013A1`.

| Year | Admin-1 | # units  | Admin-2  | # units |
| :---: | :----:  | :----:   | :----:   | :---:  |
| 1982 | **`KE1982A1`**| 8  | **`KE1982A2`** | 41|
| 1989 | **`KE1989A1`**| 8  | **`KE1989A2`** | 47|
| 2009 | KE2009A1| 8        | KE2009A2 | 47      |
| 2013 | **`KE2013A1`**| 47 | KE2013A2 | 290     |

- Comparison between admin boundaries.

![image](https://github.com/chc-ucsb/gscd/blob/main/figures/KE_admin_shapes.png?raw=true)


- In 1989, 6 districts are divided and added.

| 1982-1989 (original) |1989-2013 (changed)|1989-2013 (added)|
| :---:| :---:|:---:|
|Meru (KE1982A25020)    | Meru (KE1989A25017)    | Tharaka Nithi (KE1989A25018)|
|Machakos (KE1982A25019)| Machakos(KE1989A25004) | Makueni       (KE1989A25005)|
|Kericho  (KE1982A25424)| Kericho  (KE1989A25422)| Bomet         (KE1989A25420)|
|Kissi    (KE1982A25319)| Kissi    (KE1989A25314)| Nyamira       (KE1989A25308)|
|Homa Bay (KE1982A25318)| Homa Bay (KE1989A25313)| Migori        (KE1989A25316)|
|Kakamega (KE1982A25512)| Kakamega (KE1989A25511)| Vihiga        (KE1989A25508)|

- In 2013, Admin-2 became Admin-1 (no name changes).
- **`KE2013A1`** is used to represent all admin-level 1 crop data.
- Kenya has three crop seasons: `Annual harvest`, `Long rains harvest`, `Short rains harvest`.
- Kenya has a single crop production system: `None`.

In [4]:
# Define the latest shapefile ------------------- #
latest_level = 1
shape_latest = KE_Admin1_2013.copy().to_crs('epsg:4326')
# ----------------------------------------------- #

# Validation of FNIDs and Names ----------------- #
df = FDW_PD_ValidateFnidName(df, shape_used, shape_latest)
df['season_name'] = df['season_name'].replace({'Annual harvest':'Annual', 'Long rains harvest':'Long', 'Short rains harvest':'Short'})
# ----------------------------------------------- #

# FDW Production Data Compiling ----------------- #
area, prod = FDW_PD_Compiling(df, shape_used)
area_all, prod_all = area.copy(), prod.copy()
mdx_pss = area.columns.droplevel([0,1]).unique()
# ----------------------------------------------- #

In [5]:
# Link admin boundaries ------------------------- #
link_1982, over_1982 = FDW_PD_CreateAdminLink(KE_Admin2_1982, KE_Admin1_2013, 'ADMIN2', 'ADMIN1', prod, epsg)
assert all(np.unique([v['method'] for k,v in link_1982.items()]) == 'PBR')
link_1989, over_1989 = FDW_PD_CreateAdminLink(KE_Admin2_1989, KE_Admin1_2013, 'ADMIN2', 'ADMIN1', prod, epsg)
assert all(np.unique([v['method'] for k,v in link_1982.items()]) == 'PBR')
# Crop specific ratios
link_ratio_1982 = FDW_PD_RatioAdminLink(link_1982, prod, over_1982, mdx_pss)
link_ratio_1989 = FDW_PD_RatioAdminLink(link_1989, prod, over_1989, mdx_pss)
# Merge link_ratio
assert link_ratio_1982.keys() == link_ratio_1989.keys()
link_merged = [link_ratio_1982, link_ratio_1989]
fnids_new = list(link_merged[0].keys())
link_ratio = dict()
for fnid in fnids_new:
    container = []
    for link in link_merged:
        container.append(link[fnid])
    link_ratio[fnid] = pd.concat(container, axis=1)
# Add current unit to link_ratio
for fnid_new in link_ratio.keys():
    link_ratio[fnid_new][fnid_new] = 1.0
    link_ratio[fnid_new] = link_ratio[fnid_new].sort_index(axis=1, ascending=False)
# Connect data with AdminLink
area_new, prod_new = FDW_PD_ConnectAdminLink(link_ratio, area, prod, validation=False)
# ----------------------------------------------- #

In [6]:
# Manual correction ----------------------------- #
crop_new = prod_new/area_new
# - Remove "Annual" season yield over 10 mt/ha
# rdx = crop_new.loc[:,pd.IndexSlice[:,:,'Maize Grain (White)','Annual']] > 10
# area_new[rdx] = np.nan
# prod_new[rdx] = np.nan
# - Remove "Short" season yield over 4 mt/ha
rdx = crop_new.loc[:,pd.IndexSlice[:,:,'Maize Grain (White)','Short']] > 4
crop_new[rdx] = np.nan
# prod_new[rdx] = np.nan
# ----------------------------------------------- #

# Complete long format DataFrame ---------------- #
df_area = area_new.T.stack().rename('value').reset_index()
df_area['indicator'] = 'area'
df_prod = prod_new.T.stack().rename('value').reset_index()
df_prod['indicator'] = 'production'
df_yield = crop_new.T.stack().rename('value').reset_index()
df_yield['indicator'] = 'yield'
stack = pd.concat([df_area, df_prod, df_yield], axis=0)
# Add "planting year"
cols = ['season_name','product','crop_production_system','planting_month','harvest_year','harvest_month','planting_year']
season_table = df[cols].drop_duplicates()
stack = stack.merge(season_table, on=cols[:-1])
# Add country and admin names
stack = stack.merge(df[['fnid','country','country_code','admin_1','admin_2']].drop_duplicates(), on='fnid', how='inner')
names = [
    'fnid','country','country_code','admin_1','admin_2','name',
    'product','season_name','planting_year','planting_month','harvest_year','harvest_month',
    'crop_production_system','indicator','value'
]
stack_gscd = stack[names]
stack_gscd['gscd_code'] = 'calibrated'
# ----------------------------------------------- #

# Reported FDW data ----------------------------- #
stack_fdw = df[names]
stack_fdw['indicator'] = stack_fdw['indicator'].replace({'Area Harvested':'area','Quantity Produced':'production','Yield':'yield'})
stack_fdw['gscd_code'] = 'reported'
# ----------------------------------------------- #

# Final Processing ------------------------------ #
stack = pd.concat([stack_fdw,stack_gscd], axis=0).reset_index(drop=True)
# No concerns found for grain types
stack['product'] = stack['product'].replace(product_category)
# Calibration of planting and Harvest year and season
cs = {
    'Annual': {'planting_month':{'10-01':'10-01'},'harvest_month':{'12-01':'12-01'}},
    'Short': {'planting_month':{'02-01':'10-01'},'harvest_month':{'03-01':'03-01'}},
    'Long': {'planting_month':{'07-01':'03-01'},'harvest_month':{'08-01':'08-01'}},
}
cy = {'Short': {'planting_year':-1}}
stack, df, link_ratio = FDW_PD_CaliSeasonYear(stack, df, link_ratio, cs, cy)
stack.loc[stack['admin_1'].isna(), 'admin_1'] = 'none'
stack.loc[stack['admin_2'].isna(), 'admin_2'] = 'none'
# ----------------------------------------------- #

# Save data
fn_out = '../data/crop/adm_crop_production_KE.csv'
stack.to_csv(fn_out); print(f'{fn_out} is saved.')
save_hdf('../data/crop/adm_crop_production_KE.hdf', stack)
save_hdf('../data/crop/adm_crop_production_KE_raw.hdf', df)
save_npz('../data/crop/adm_crop_production_KE_ratio.npz', link_ratio)

../data/crop/adm_crop_production_KE.csv is saved.
../data/crop/adm_crop_production_KE.hdf is saved.
../data/crop/adm_crop_production_KE_raw.hdf is saved.
../data/crop/adm_crop_production_KE_ratio.npz is saved.


## Visualization of production data

In [7]:
# Bar chart of national grain production
country_iso, country_name = 'KE', 'Kenya'
df = pd.read_hdf('../data/crop/adm_crop_production_%s.hdf' % country_iso)
df = df[df['gscd_code']=='calibrated']; df['year'] = df['harvest_year']
year = [df['year'].min(), df['year'].max()]
product_order = ['Maize', 'Sorghum', 'Millet', 'Rice', 'Wheat', 'Barley']
for season_name in ['Annual','Long','Short']:
    footnote = 'National grain production in %s - %s' % (country_name, season_name)
    fn_save = '../figures/%s_bar_natgrainprod_%s.png' % (country_iso, season_name)
    sub = df[df['season_name'] == season_name]
    fig = PlotBarProduction(sub, year, product_order, footnote, fn_save)
    # fig.show()

../figures/KE_bar_natgrainprod_Annual.png is saved.
../figures/KE_bar_natgrainprod_Long.png is saved.
../figures/KE_bar_natgrainprod_Short.png is saved.


![image](https://github.com/chc-ucsb/gscd/blob/main/figures/KE_bar_natgrainprod_Annual.png?raw=true)
![image](https://github.com/chc-ucsb/gscd/blob/main/figures/KE_bar_natgrainprod_Long.png?raw=true)
![image](https://github.com/chc-ucsb/gscd/blob/main/figures/KE_bar_natgrainprod_Short.png?raw=true)

In [8]:
# Lineplot of Production-Area-Yield (PAY) time-series
country_iso, country_name = 'KE', 'Kenya'
df = pd.read_hdf('../data/crop/adm_crop_production_%s.hdf' % country_iso)
df = df[df['gscd_code']=='calibrated']; df['year'] = df['harvest_year']
year = [df['year'].min(), df['year'].max()]
product_season = [
    ['Maize','Annual'],
    ['Maize','Long'],
    ['Maize','Short']
]
for product_name, season_name in product_season:
    footnote = 'Production-Area-Yield (PAY) time-series of %s - %s - %s' % (country_iso, product_name, season_name)
    fn_save = '../figures/%s_line_pay_%s_%s.png' % (country_iso, product_name, season_name)
    sub = df[(df['product'] == product_name) & (df['season_name'] == season_name)]
    fig = PlotLinePAY(sub, year, footnote, fn_save)
    # fig.show()

../figures/KE_line_pay_Maize_Annual.png is saved.
../figures/KE_line_pay_Maize_Long.png is saved.
../figures/KE_line_pay_Maize_Short.png is saved.


![image](https://github.com/chc-ucsb/gscd/blob/main/figures/KE_line_pay_Maize_Annual.png?raw=true)
![image](https://github.com/chc-ucsb/gscd/blob/main/figures/KE_line_pay_Maize_Long.png?raw=true)
![image](https://github.com/chc-ucsb/gscd/blob/main/figures/KE_line_pay_Maize_Short.png?raw=true)

In [9]:
# Heatmap of seasonal data availability
country_iso, country_name = 'KE', 'Kenya'
df = pd.read_hdf('../data/crop/adm_crop_production_%s_raw.hdf' % country_iso)
df['year'] = df['harvest_year']
code = {'Annual':1,'Long':10,'Short':100}
comb = {1:1,10:2,11:3,100:4,101:5,110:6,111:7}
comb_name = {1:'Annual',2:'Long',3:'Annual + Long',4:'Short',5:'Annual + Short',6:'Long + Short',7:'All'}
for product_name in ['Maize Grain (White)']:
    data = df[(df['product'] == product_name) & (df['season_name'].isin(code.keys()))]
    footnote = 'Seasonal data availability in %s - %s (uncorrected)' % (country_name, product_name)
    fn_save = '../figures/%s_heat_seasondata_%s.png' % (country_iso, product_name)
    fig = PlotHeatSeasonData(data, code, comb, comb_name, footnote, fn_save)
    # fig.show()

../figures/KE_heat_seasondata_Maize Grain (White).png is saved.


![image](https://github.com/chc-ucsb/gscd/blob/main/figures/KE_heat_seasondata_Maize%20Grain%20(White).png?raw=true)

In [11]:
# Calibrated PAY time-series per FNID
from tools_graphic import PlotLineCropTS
country_iso, country_name = 'KE', 'Kenya'
df = pd.read_hdf('../data/crop/adm_crop_production_%s.hdf' % country_iso)
df = df[df['gscd_code']=='calibrated']; df['year'] = df['harvest_year']
link_ratio = load_npz('../data/crop/adm_crop_production_%s_ratio.npz' % country_iso)
year_all = np.arange(df['year'].min(), df['year'].max()+1)
prod_season = [
    ['Maize', 'Annual'],
    ['Maize', 'Long'],
    ['Maize', 'Short'],
]
for product_name, season_name in prod_season:
    sub = df[
        (df['product'] == product_name) &
        (df['season_name'] == season_name)
    ]
    for fnid in sub['fnid'].unique():
        sub_fps = sub[sub['fnid'] == fnid]
        fn_save = '../figures/crop_calibrated/%s_%s_%s_%s.png' % (country_iso, product_name, season_name, fnid)
        fig = PlotLineCropTS(sub_fps, fnid, product_name, season_name, link_ratio, year_all, fn_save)

../figures/crop_calibrated/KE_Maize_Annual_KE2013A101.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A102.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A103.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A104.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A105.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A106.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A107.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A108.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A109.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A110.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A111.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A112.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A113.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE2013A114.png is saved.
../figures/crop_calibrated/KE_Maize_Annual_KE201