## FDW Crop Production Data Profiling - Guinea

In [1]:
import os, sys, glob, json
from itertools import product, compress, chain
from functools import reduce
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_hdf, save_npz, load_npz, PrintAdminUnits, PlotAdminShapes
from tools import FDW_PD_Sweeper, FDW_PD_AvalTable, FDW_PD_Compiling, FDW_PD_GrainTypeAgg, FDW_PD_ValidateFnidName
from tools import FDW_PD_CreateAdminLink, FDW_PD_RatioAdminLink, FDW_PD_ConnectAdminLink
from tools import FDW_PD_CaliSeasonYear
from tools_graphic import PlotBarProduction, PlotLinePAY, PlotHeatCropSystem, PlotHeatSeasonData
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

In [2]:
# CPCV2 grain code ------------------------------ #
grain_code = pd.read_hdf('../data/crop/grain_cpcv2_code.hdf')
product_category = grain_code[['product', 'product_category']].set_index('product').to_dict()['product_category']
# ----------------------------------------------- #

# Load FEWS NET administrative boundaries ------- #
epsg = 'epsg:32628' # Guinea
fn_shapes = sorted(glob.glob('../data/shapefile/fewsnet/GN_Admin?_????.shp'))
shape_all = []
for fn in fn_shapes:
    name = fn[-18:-4]
    exec('%s = gpd.read_file("%s").to_crs("%s")' % (name, fn, epsg))
    exec('%s["area"] = %s["geometry"].area/10**6' % (name, name))
    exec('shape_all.append(%s)' % (name))
shape_all = pd.concat(shape_all, axis=0).reset_index(drop=True)
PrintAdminUnits(shape_all)
# ----------------------------------------------- #

# FDW API host address -------------------------- #
host = 'https://fdw.fews.net'
auth = tuple(json.loads(open('token.json', "r").read()))
parameters = {
    'format': 'json',
    'country': 'Guinea',
    'product': 'R011',
    'survey_type': 'crop:best'
}
endpoint = '/api/cropproductionindicatorvalue/'
response = requests.get(host + endpoint, auth=auth, params=parameters, proxies={})
response.raise_for_status()
df = pd.DataFrame.from_records(response.json())
df_origin = df.copy()
# ----------------------------------------------- #

- FEWS NET admin shapefiles ------------------- #
| year	 | Admin1   | # units   | Admin2   | # units   | Admin3   | # units   |
| 1990	 | GN1990A1 | 8	| GN1990A2	| 34	| nan	| 0	|
| 2014	 | GN2014A1 | 8	| GN2014A2	| 34	| nan	| 0	|
----------------------------------------------- #


In [3]:
# Manual Pre-processing before Sweeping --------- #
# 1. Default setting 
# a) None-type crop production system
df.loc[df['crop_production_system'].isna(), 'crop_production_system'] = 'none'
df.loc[df['crop_production_system'] == '', 'crop_production_system'] = 'none'
# b) None-type population group
df.loc[df['population_group'].isna(), 'population_group'] = 'none'
df.loc[df['population_group'] == '', 'population_group'] = 'none'
# Replace season name
df['season_name'] = df['season_name'].replace({
    'Main harvest': "Main"
})
# ----------------------------------------------- #

# FDW Production Data Inspection ---------------- #
df, df_raw = FDW_PD_Sweeper(df, area_priority='Area Planted')
table_dict = FDW_PD_AvalTable(df, shape_all)
# ----------------------------------------------- #

# FEWS NET Shapefile comparison ----------------- #
shape_used = pd.concat([
    GN_Admin1_1990, GN_Admin2_1990
], axis=0)
PlotAdminShapes(shape_used, label=True)
# ----------------------------------------------- #

- Remove missing records ---------------------- #
Orignial data points: 426
426/426 "Quantity Produced" points are retained.
Current data points: 426

- Minor changes are applied ------------------- #

- Basic information --------------------------- #
Data period: 2008 - 2015
3 grain types are found: Fonio, Maize (Corn), Rice (Paddy)
1 seasons are found: Main (09-01)
1 crop production system are found: none
Data sources include:
[1] L'Agence Nationale des Statistiques Agricoles et Alimentaires, Guinea --- Dynamique de production, Guinea
Administrative-1 fnids: 7
Administrative-2 fnids: 33
0 reporting units are found: 

- Total production over time ------------------ #
season_name    Main
                   
Fonio          3.0%
Maize (Corn)  32.1%
Rice (Paddy)  64.9%

- Crop calendar ------------------------------- #
        product season_name planting_month harvest_month
0         Fonio        Main          09-01         12-01
1  Maize (Corn)        Main          09-01         12-01
2

- Guinea crop seasonal calendar

![FEWS NET](../figures/crop_calendar/seasonal-calendar-guinea.png)

![USDA](https://ipad.fas.usda.gov/rssiws/al/crop_calendar/images/wafrica_gv_calendar.png)

- FDW data consists of `GN1990A1` and `GN1990A2`.

| Year | Admin-1 | # units  | Admin-2  | # units |
| :---: | :----:  | :----:   | :----:   | :---:  |
| 1990	 | **`GN1990A1`** | 8	| **`GN1990A2`**	| 34	|
| 2014	 | GN2014A1 | 8	| GN2014A2	| 34	|

- Comparison between admin boundaries.

![image](../figures/GN_admin_shapes.png)

- **FDW data consists of only "Quantity Produced" for 8 years**.
- Different admin-level for two periods: GN1990A1 (2008-2009) and GN1990A2 (2010-2015)
- **`GN1990A2`** is used to represent current admin-level 2 crop data.
- Guinea has a single crop seasons: Main harvest (09-01).
- Guinea has no crop production system.
- Guinea has no population group.

In [4]:
# Define the latest shapefile ------------------- #
latest_level = 2
shape_latest = GN_Admin2_1990.copy().to_crs('epsg:4326')
# ----------------------------------------------- #

# Validation of FNIDs and Names ----------------- #
df = FDW_PD_ValidateFnidName(df, shape_used, shape_latest)
# ----------------------------------------------- #

# FDW Production Data Compiling ----------------- #
area, prod = FDW_PD_Compiling(df, shape_used)
area_all, prod_all = area.copy(), prod.copy()
mdx_pss = area.columns.droplevel([0,1]).unique()
# ----------------------------------------------- #

In [5]:
# Link admin boundaries ------------------------- #
link, over = FDW_PD_CreateAdminLink(GN_Admin1_1990, GN_Admin2_1990, 'ADMIN1', 'ADMIN2', prod, epsg)
link['GN1990A102'].update(method='PBR')
assert all(np.unique([v['method'] for k,v in link.items()]) == 'PBR')
# Crop specific ratios
link_ratio = FDW_PD_RatioAdminLink(link, prod, over, mdx_pss)
# Add current unit to link_ratio
for fnid_new in link_ratio.keys():
    link_ratio[fnid_new][fnid_new] = 1.0
    link_ratio[fnid_new] = link_ratio[fnid_new].sort_index(axis=1, ascending=False)
# Connect data with AdminLink
area_new, prod_new = FDW_PD_ConnectAdminLink(link_ratio, area, prod, validation=True)
# ----------------------------------------------- #

CBR is considered for 'GN1990A102' as no record found in: ['GN1990A20201']


In [7]:
# Manual correction ----------------------------- #
crop_new = prod_new/area_new
# ----------------------------------------------- #

# Complete long format DataFrame ---------------- #
df_area = area_new.T.stack().rename('value').reset_index()
df_area['indicator'] = 'area'
df_prod = prod_new.T.stack().rename('value').reset_index()
df_prod['indicator'] = 'production'
df_yield = (crop_new).T.stack().rename('value').reset_index()
df_yield['indicator'] = 'yield'
stack = pd.concat([df_area, df_prod, df_yield], axis=0)
# Add "planting year"
cols = ['season_name','product','crop_production_system','planting_month','harvest_year','harvest_month','planting_year']
season_table = df[cols].drop_duplicates()
stack = stack.merge(season_table, on=cols[:-1])
# Add country and admin names
stack = stack.merge(df[['fnid','country','country_code','admin_1','admin_2']].drop_duplicates(), on='fnid', how='inner')
names = [
    'fnid','country','country_code','admin_1','admin_2','name',
    'product','season_name','planting_year','planting_month','harvest_year','harvest_month',
    'crop_production_system','indicator','value'
]
stack_gscd = stack[names]
stack_gscd['gscd_code'] = 'calibrated'
# ----------------------------------------------- #

# Reported FDW data ----------------------------- #
stack_fdw = df[names]
stack_fdw['indicator'] = stack_fdw['indicator'].replace({'Area Harvested':'area','Quantity Produced':'production','Yield':'yield'})
stack_fdw['gscd_code'] = 'reported'
# ----------------------------------------------- #

# Final Processing ------------------------------ #
stack = pd.concat([stack_fdw,stack_gscd], axis=0).reset_index(drop=True)
# No concerns found for grain types
stack['product'] = stack['product'].replace(product_category)
# Calibration of planting and Harvest year and season
crop_calendar = dict(
    Rice = dict(planting_year=0, planting_month='05-01', harvest_year=0, harvest_month='12-01'),
    Maize = dict(planting_year=0, planting_month='05-01', harvest_year=0, harvest_month='12-01')
)
for k, v in crop_calendar.items():
    stack.loc[stack['product'] == k, 'planting_year'] += v['planting_year']
    stack.loc[stack['product'] == k, 'planting_month'] = v['planting_month']
    stack.loc[stack['product'] == k, 'harvest_year'] += v['harvest_year']
    stack.loc[stack['product'] == k, 'harvest_month'] = v['harvest_month']
stack.loc[stack['admin_1'].isna(), 'admin_1'] = 'none'
stack.loc[stack['admin_2'].isna(), 'admin_2'] = 'none'
# ----------------------------------------------- #

# Save data
fn_out = '../data/crop/adm_crop_production_GN.csv'
stack.to_csv(fn_out); print(f'{fn_out} is saved.')
save_hdf('../data/crop/adm_crop_production_GN.hdf', stack)
save_hdf('../data/crop/adm_crop_production_GN_raw.hdf', df)

../data/crop/adm_crop_production_GN.csv is saved.
../data/crop/adm_crop_production_GN.hdf is saved.
../data/crop/adm_crop_production_GN_raw.hdf is saved.


## Visualization of production data

In [8]:
# Bar chart of national grain production
country_iso, country_name = 'GN', 'Guinea'
df = pd.read_hdf('../data/crop/adm_crop_production_%s.hdf' % country_iso)
df = df[df['gscd_code']=='calibrated']; df['year'] = df['harvest_year']
year = [df['year'].min(), df['year'].max()]
product_order = ['Rice', 'Maize']
for season_name in ['Main']:
    footnote = 'National grain production in %s - %s' % (country_name, season_name)
    fn_save = '../figures/%s_bar_natgrainprod_%s.png' % (country_iso, season_name)
    sub = df[df['season_name'] == season_name]
    fig = PlotBarProduction(sub, year, product_order, footnote, fn_save)
    fig.show()

../figures/GN_bar_natgrainprod_Main.png is saved.


In [9]:
# Lineplot of Production-Area-Yield (PAY) time-series
country_iso, country_name = 'GN', 'Guinea'
df = pd.read_hdf('../data/crop/adm_crop_production_%s.hdf' % country_iso)
df = df[df['gscd_code']=='calibrated']; df['year'] = df['harvest_year']
year = [df['year'].min(), df['year'].max()]
product_season = [
    ['Rice','Main'],
    ['Maize','Main'],
]
for product_name, season_name in product_season:
    footnote = 'Production-Area-Yield (PAY) time-series of %s - %s - %s' % (country_iso, product_name, season_name)
    fn_save = '../figures/%s_line_pay_%s_%s.png' % (country_iso, product_name, season_name)
    sub = df[(df['product'] == product_name) & (df['season_name'] == season_name)]
    fig = PlotLinePAY(sub, year, footnote, fn_save)
    fig.show()

../figures/GN_line_pay_Rice_Main.png is saved.


../figures/GN_line_pay_Maize_Main.png is saved.
