## FDW Crop Production Data Profiling - Burkina Faso

In [1]:
import os, sys, glob, json
from itertools import product, compress, chain
from functools import reduce
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_hdf, save_npz, load_npz, PrintAdminUnits, PlotAdminShapes
from tools import FDW_PD_Sweeper, FDW_PD_AvalTable, FDW_PD_Compiling, FDW_PD_GrainTypeAgg, FDW_PD_ValidateFnidName
from tools import FDW_PD_CreateAdminLink, FDW_PD_RatioAdminLink, FDW_PD_ConnectAdminLink
from tools_graphic import PlotBarProduction, PlotLinePAY, PlotHeatCropSystem, PlotHeatSeasonData
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

In [2]:
# CPCV2 grain code ------------------------------ #
grain_code = pd.read_hdf('./data/crop/grain_cpcv2_code.hdf')
product_category = grain_code[['product', 'product_category']].set_index('product').to_dict()['product_category']
# ----------------------------------------------- #

# Load FEWS NET administrative boundaries ------- #
epsg = 'epsg:32630' # Burkina Faso
fn_shapes = sorted(glob.glob('./data/shapefile/fewsnet/BF_Admin?_????.shp'))
shape_all = []
for fn in fn_shapes:
    name = fn[-18:-4]
    exec('%s = gpd.read_file("%s").to_crs("%s")' % (name, fn, epsg))
    exec('%s["area"] = %s["geometry"].area/10**6' % (name, name))
    exec('shape_all.append(%s)' % (name))
shape_all = pd.concat(shape_all, axis=0).reset_index(drop=True)
PrintAdminUnits(shape_all)
# ----------------------------------------------- #

# FDW API host address -------------------------- #
host = 'https://fdw.fews.net'
auth = tuple(json.loads(open('token.json', "r").read()))
parameters = {
    'format': 'json',
    'country': 'Burkina Faso',
    'product': 'R011',
    'survey_type': 'crop:best'
}
endpoint = '/api/cropproductionindicatorvalue/'
response = requests.get(host + endpoint, auth=auth, params=parameters, proxies={})
response.raise_for_status()
df = pd.DataFrame.from_records(response.json())
df_origin = df.copy()
# ----------------------------------------------- #

# Manual Pre-processing before Sweeping --------- #
# 1. Default setting 
# a) None-type population group
df.loc[df['population_group'].isna(), 'population_group'] = 'none'
df.loc[df['population_group'] == '', 'population_group'] = 'none'
# ----------------------------------------------- #

# FDW Production Data Inspection ---------------- #
df, df_raw = FDW_PD_Sweeper(df)
table_dict = FDW_PD_AvalTable(df, shape_all)
# ----------------------------------------------- #

# FEWS NET Shapefile comparison ----------------- #
shape_used = pd.concat([BF_Admin1_1984, BF_Admin2_2001], axis=0)
PlotAdminShapes(shape_used, label=True)
# ----------------------------------------------- #

- FEWS NET admin shapefiles ------------------- #
        Admin1  # units    Admin2  # units
year                                      
1984  BF1984A1       30       NaN        0
1997  BF1997A1       45       NaN        0
2001  BF2001A1       13  BF2001A2       45
2020  BF2020A1       13  BF2020A2       45
----------------------------------------------- #
- Remove missing records ---------------------- #
Orignial data points: 16,278
Removed 76 "Not Collected" points
5,410/5,426 "Area Planted" points are retained.
5,404/5,426 "Quantity Produced" points are retained.
5,388/5,426 "Yield" points are retained.
Current data points: 16,202

- Minor changes are applied.. ----------------- #

- Basic information --------------------------- #
Data period: 1984 - 2019
5 grain types are found: Fonio, Maize (Corn), Millet, Rice (Paddy), Sorghum
1 seasons are found: Main (10-01)
1 crop production system are found: none
Data sources include:
[1] Ministry of Agriculture, Burkina Faso --- Annuaire des 

- Burkina Faso crop seasonal calendar

![FEWS NET](https://fews.net/sites/default/files/styles/large/public/seasonal-calendar-burkina-faso.png?itok=iibynbsG)

- FDW data consists of `BF1984A1` and `BF2001A2`.

| Year  | Admin-1  | # units  | Admin-2  | # units |
| :---: | :----:   | :----:   | :----:   | :---:   |
| 1984  | **`BF1984A1`** | 30 | -        | -       |
| 1997  | BF1997A1 | 45       | -        | -       |
| 2001  | BF2001A1 | 13       | **`BF2001A2`** | 45     |
| 2020  | BF2020A1 | 13       | BF2020A2 | 45     |

- Comparison between admin boundaries.

![image](https://github.com/chc-ucsb/gscd/blob/main/figures/BF_admin_shapes.png?raw=true)

- In 1997, 15 districts are divided and added.

| 1984-1997|1997-present|
| :---:|:---:|
|BF1984A161 (Bougouriba) | BF2001A21301 (Bougouriba), BF2001A21302 (loba)|
|BF1984A163 (Comoe) | BF2001A20201 (Comoe), BF2001A20202 (Leraba)|
|BF1984A164 (Gourma) | BF2001A20802 (Gourma), BF2001A20803 (Komondjari), BF2001A20804 (Kompienga), BF2001A20402 (Koulpelogo)|
|BF1984A165 (Houet) | BF2001A20901 (Houet), BF2001A20903 (Tuy)|
|BF1984A168 (Kossi) | BF2001A20103 (Kossi), BF2001A20102 (Banwa)|
|BF1984A169 (Mouhoun) | BF2001A20104 (Mouhoun), BF2001A20101 (Bale)|
|BF1984A170 (Oubritenga) | BF2001A21103 (Oubritenga), BF2001A21102 (Kourweogo)|
|BF1984A171 (Poni) | BF2001A21304 (Poni), BF2001A21303 (Noumbiel)|
|BF1984A172 (Seno) | BF2001A21202 (Seno), BF2001A21204 (Yagha)|
|BF1984A173 (Sissili) | BF2001A20603 (Sissili), BF2001A20604 (Ziro)|
|BF1984A174 (Sourou) | BF2001A20106 (Sourou), BF2001A20105 (Nayala)|
|BF1984A175 (Yatenga) | BF2001A21003 (Yatenga), BF2001A21001 (Loroum), BF2001A21004 (Zondoma)|

- In 2001, Admin-1 level became Admin-2 level (no name changes).
- **`BF2001A2`** is used to represent the current admin-level 2 crop data.
- Burkina Faso has a single crop season: `Main`.
- Burkina Faso has no population group(s).

In [3]:
# Define the latest shapefile ------------------- #
latest_level = 2
shape_latest = BF_Admin2_2001.copy().to_crs('epsg:4326')
# ----------------------------------------------- #

# Validation of FNIDs and Names ----------------- #
df = FDW_PD_ValidateFnidName(df, shape_used, shape_latest)
# ----------------------------------------------- #

# FDW Production Data Compiling ----------------- #
area, prod = FDW_PD_Compiling(df, shape_used)
area_all, prod_all = area.copy(), prod.copy()
mdx_pss = area.columns.droplevel([0,1]).unique()
# ----------------------------------------------- #

BF1984A147:	"Boulkiemde" (FDW) is changed to "Bulkiemde" (shapefile).
BF2001A20601:	"Boulkiemde" (FDW) is changed to "Bulkiemde" (shapefile).


In [4]:
# Link admin boundaries ------------------------- #
link, over = FDW_PD_CreateAdminLink(BF_Admin1_1984, BF_Admin2_2001, 'ADMIN1', 'ADMIN2', area, epsg)
# Manual Editing
link['BF1984A169'].update(method='ABR')
link['BF1984A155'].update(method='ABR', fnids={'BF2001A20502': {'name': 'Namentenga'}})
link['BF1984A152'].update(method='ABR')
link['BF1984A173'].update(method='ABR', fnids={'BF2001A20603': {'name': 'Sissili'},'BF2001A20604': {'name': 'Ziro'}})
# Crop specific ratios
link_ratio = FDW_PD_RatioAdminLink(link, area, over, mdx_pss)
# Add current unit to link_ratio
for fnid_new in link_ratio.keys():
    link_ratio[fnid_new][fnid_new] = 1.0
    link_ratio[fnid_new] = link_ratio[fnid_new].sort_index(axis=1, ascending=False)
# Connect data with AdminLink
area_new, prod_new = FDW_PD_ConnectAdminLink(link_ratio, area, prod, validation=True)
# ----------------------------------------------- #

# Aggregate grain data by grain type ------------ #
[area_new, prod_new, area_all, prod_all] = FDW_PD_GrainTypeAgg([area_new, prod_new, area_all, prod_all], product_category)
# ----------------------------------------------- #

# Manual correction ----------------------------- #
crop_new = prod_new/area_new
# ----------------------------------------------- #

# Complete long format DataFrame
df_area = area_new.T.stack().reset_index().rename({0:'value'},axis=1)
df_area['indicator'] = 'area'
df_prod = prod_new.T.stack().reset_index().rename({0:'value'},axis=1)
df_prod['indicator'] = 'production'
df_yield = (prod_new/area_new).T.stack().reset_index().rename({0:'value'},axis=1)
df_yield['indicator'] = 'yield'
stack = pd.concat([df_area, df_prod, df_yield], axis=0)
# Insert a country name
stack['country'] = 'Burkina Faso'
stack = stack[['fnid','country','name','product','year','season_name','season_date','indicator','value']]
stack = stack.reset_index(drop=True)
# Change season_date to harvest_end of the FEWS NET's crop calendar
stack.rename(columns={'season_date':'harvest_end'},inplace=True)
stack['harvest_end'] = stack['harvest_end'].replace({
    '10-01':'12-01' # Main
})

# Save data
save_hdf('./data/crop/adm_crop_production_raw_BF.hdf', df)
save_hdf('./data/crop/adm_crop_production_BF.hdf', stack)
save_npz('./data/crop/adm_crop_production_BF_ratio.npz', link_ratio)

- Aggregation of grain types ------------------ #
5 crops: Fonio, Maize (Corn), Millet, Rice (Paddy), Sorghum
5 crops: Fonio, Maize, Millet, Rice, Sorghum

./data/crop/adm_crop_production_raw_BF.hdf is saved.
./data/crop/adm_crop_production_BF.hdf is saved.
./data/crop/adm_crop_production_BF_ratio.npz is saved.


## Visualization of production data

In [5]:
# Bar chart of national grain production
country_iso, country_name = 'BF', 'Burkina Faso'
df = pd.read_hdf('./data/crop/adm_crop_production_%s.hdf' % country_iso)
year = [df['year'].min(), df['year'].max()]
product_order = ['Sorghum','Millet','Maize','Rice','Fonio']
for season_name in ['Main']:
    footnote = 'National grain production in %s - %s' % (country_name, season_name)
    fn_save = './figures/%s_bar_natgrainprod_%s.png' % (country_iso, season_name)
    sub = df[df['season_name'] == season_name]
    fig = PlotBarProduction(sub, year, product_order, footnote, fn_save)
    # fig.show()

./figures/BF_bar_natgrainprod_Main.png is saved.


![image](https://github.com/chc-ucsb/gscd/blob/main/figures/BF_bar_natgrainprod_Main.png?raw=true)

In [6]:
# Lineplot of Production-Area-Yield (PAY) time-series
country_iso, country_name = 'BF', 'Burkina Faso'
df = pd.read_hdf('./data/crop/adm_crop_production_%s.hdf' % country_iso)
year = [df['year'].min(), df['year'].max()]
product_season = [
    ['Maize','Main'],
    ['Sorghum','Main'],
    ['Millet','Main'],
]
for product_name, season_name in product_season:
    footnote = 'Production-Area-Yield (PAY) time-series of %s - %s - %s' % (country_iso, product_name, season_name)
    fn_save = './figures/%s_line_pay_%s_%s.png' % (country_iso, product_name, season_name)
    sub = df[(df['product'] == product_name) & (df['season_name'] == season_name)]
    fig = PlotLinePAY(sub, year, footnote, fn_save)
    # fig.show()

./figures/BF_line_pay_Maize_Main.png is saved.
./figures/BF_line_pay_Sorghum_Main.png is saved.
./figures/BF_line_pay_Millet_Main.png is saved.


![image](https://github.com/chc-ucsb/gscd/blob/main/figures/BF_line_pay_Maize_Main.png?raw=true)
![image](https://github.com/chc-ucsb/gscd/blob/main/figures/BF_line_pay_Sorghum_Main.png?raw=true)
![image](https://github.com/chc-ucsb/gscd/blob/main/figures/BF_line_pay_Millet_Main.png?raw=true)

In [7]:
df = pd.read_hdf('./data/crop/adm_crop_production_raw_%s.hdf' % country_iso)
df['product'].unique()

array(['Millet', 'Sorghum', 'Rice (Paddy)', 'Maize (Corn)', 'Fonio'],
      dtype=object)

In [2]:
# Heatmap of seasonal data availability
country_iso, country_name = 'BF', 'Burkina Faso'
df = pd.read_hdf('./data/crop/adm_crop_production_raw_%s.hdf' % country_iso)
code = {'Main':1}
comb = {1:1}
comb_name = {1:'Main'}
for product_name in ['Maize (Corn)','Millet','Sorghum']:
    data = df[(df['product'] == product_name) & (df['season_name'].isin(code.keys()))]
    footnote = 'Seasonal data availability in %s - %s (uncorrected)' % (country_name, product_name)
    fn_save = './figures/%s_heat_seasondata_%s.png' % (country_iso, product_name)
    fig = PlotHeatSeasonData(data, code, comb, comb_name, footnote, fn_save)
    # fig.show()

./figures/BF_heat_seasondata_Maize (Corn).png is saved.
./figures/BF_heat_seasondata_Millet.png is saved.
./figures/BF_heat_seasondata_Sorghum.png is saved.


![image](https://github.com/chc-ucsb/gscd/blob/main/figures/BF_heat_seasondata_Maize%20(Corn).png?raw=true)
![image](https://github.com/chc-ucsb/gscd/blob/main/figures/BF_heat_seasondata_Millet.png?raw=true)
![image](https://github.com/chc-ucsb/gscd/blob/main/figures/BF_heat_seasondata_Sorghum.png?raw=true)

In [9]:
# Calibrated PAY time-series per FNID
from tools_graphic import PlotLineCropTS
country_iso, country_name = 'BF', 'Burkina Faso'
df = pd.read_hdf('./data/crop/adm_crop_production_%s.hdf' % country_iso)
link_ratio = load_npz('./data/crop/adm_crop_production_%s_ratio.npz' % country_iso)
year_all = np.arange(df['year'].min(), df['year'].max()+1)
prod_season = [
    ['Maize', 'Main'],
    ['Sorghum', 'Main']
]
for product_name, season_name in prod_season:
    sub = df[
        (df['product'] == product_name) &
        (df['season_name'] == season_name)
    ]
    for fnid in sub['fnid'].unique():
        sub_fps = sub[sub['fnid'] == fnid]
        fn_save = './figures/crop_calibrated/%s_%s_%s_%s.png' % (country_iso, product_name, season_name, fnid)
        # fig = PlotLineCropTS(sub_fps, fnid, product_name, season_name, link_ratio, year_all, fn_save)