## FDW Crop Production Data Profiling - Mali

In [1]:
import os, sys, glob, json
from itertools import product, compress, chain
from functools import reduce
import warnings
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_hdf, PrintAdminUnits, PlotAdminShapes
from tools import FDW_PD_Sweeper, FDW_PD_AvalTable, FDW_PD_Compiling, FDW_PD_GrainTypeAgg, FDW_PD_ValidateFnidName
from tools import FDW_PD_CreateAdminLink, FDW_PD_RatioAdminLink, FDW_PD_ConnectAdminLink
from tools_graphic import PlotBarProduction, PlotLinePAY, PlotHeatCropSystem, PlotHeatSeasonData
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

In [2]:
# CPCV2 grain code ------------------------------ #
grain_code = pd.read_hdf('./data/crop/grain_cpcv2_code.hdf')
product_category = grain_code[['product', 'product_category']].set_index('product').to_dict()['product_category']
# ----------------------------------------------- #

# Load FEWS NET administrative boundaries ------- #
epsg = 'epsg:32629'
fn_shapes = sorted(glob.glob('./data/shapefile/fewsnet/ML_Admin?_????.shp'))
shape_all = []
for fn in fn_shapes:
    name = fn[-18:-4]
    exec('%s = gpd.read_file("%s").to_crs("%s")' % (name, fn, epsg))
    exec('%s["area"] = %s["geometry"].area/10**6' % (name, name))
    exec('shape_all.append(%s)' % (name))
shape_all = pd.concat(shape_all, axis=0).reset_index(drop=True)
PrintAdminUnits(shape_all)
# ----------------------------------------------- #

# FDW API host address -------------------------- #
host = 'https://fdw.fews.net'
auth = tuple(json.loads(open('token.json', "r").read()))
parameters = {
    'format': 'json',
    'country': 'Mali',
    'product': 'R011',
    'survey_type': 'crop:best'
}
endpoint = '/api/cropproductionindicatorvalue/'
response = requests.get(host + endpoint, auth=auth, params=parameters, proxies={})
response.raise_for_status()
df = pd.DataFrame.from_records(response.json())
df_origin = df.copy()
# ----------------------------------------------- #

# Manual Pre-processing before Sweeping --------- #
# 1. Default setting 
# a) None-type population group
df.loc[df['population_group'].isna(), 'population_group'] = 'none'
df.loc[df['population_group'] == '', 'population_group'] = 'none'
# ----------------------------------------------- #

# FDW Production Data Inspection ---------------- #
df, df_raw = FDW_PD_Sweeper(df)
table_dict = FDW_PD_AvalTable(df, shape_all)
# ----------------------------------------------- #

# FEWS NET Shapefile comparison ----------------- #
shape_used = pd.concat([ML_Admin1_1980, ML_Admin1_1993, ML_Admin1_2016], axis=0)
PlotAdminShapes(shape_used, label=True)
# ----------------------------------------------- #

- FEWS NET admin shapefiles ------------------- #
        Admin1  # units    Admin2  # units
year                                      
1980  ML1980A1        8       NaN        0
1993  ML1993A1        9  ML1993A2       50
2001  ML2001A1        9  ML2001A2       50
2016  ML2016A1       11       NaN        0
----------------------------------------------- #
- Remove missing records ---------------------- #
Orignial data points: 3,952
Removed 6 "Collected" points
Removed 237 "Not Collected" points
1,236/1,284 "Area Planted" points are retained.
1,261/1,284 "Quantity Produced" points are retained.
1,212/1,284 "Yield" points are retained.
0/100 "Area Harvested" points are retained.
Current data points: 3,709

- Minor changes are applied.. ----------------- #

- Basic information --------------------------- #
Data period: 1982 - 2021
7 grain types are found: Barley (Unspecified), Fonio, Maize (Corn), Millet, Rice (Paddy), Sorghum, Wheat Grain
1 seasons are found: Main (10-01)
1 crop producti

- Mali crop seasonal calendar

![FEWS NET](https://fews.net/sites/default/files/styles/large/public/seasonal-calendar-mali.png?itok=F9WW6dC2)

- FDW data consists of `ML1980A1`, `ML1993A1`, and `ML2016A1`.

| Year | Admin-1 | # units  | Admin-2  | # units |
| :---: | :----:  | :----:   | :----:   | :---:  |
| 1980  | **`ML1980A1`** | 8        | - | -      |
| 1993  | **`ML1993A1`** | 9  | ML1993A2 | 50      |
| 2001  | ML2001A1 | 9  | ML2001A2 | 50     |
| 2016  | **`ML2016A1`**| 11  | - | -      |

- Comparison between admin boundaries.

![image](https://github.com/chc-ucsb/GlobalCropData/blob/main/figures/ML_admin_shapes.png?raw=true)

- In 1993, 1 district is divided and added.

| 1980-1992 |1993-2015|
| :---:|:---:|
|ML1980A110 (Gao) | ML1993A107 (Gao), ML1993A108 (Kidal)|

- In 2016, 2 district is divided and added.

| 1993-2015 |2016-present|
| :---:|:---:|
|ML1993A107 (Gao) | ML2016A107 (Gao), ML2016A110 (Menaka)|
|ML1993A106 (Tombouctou) | ML2016A106 (Tombouctou), ML2016A111 (Taoudenit)|

- **`ML2016A1`** is used to represent current admin-level 1 crop data.
- Mali has a single crop season: `Main`.
- Mali has no population group(s).

In [3]:
# Define the latest shapefile ------------------- #
latest_level = 1
shape_latest = ML_Admin1_2016.copy().to_crs('epsg:4326')
# ----------------------------------------------- #

# Validation of FNIDs and Names ----------------- #
df = FDW_PD_ValidateFnidName(df, shape_used, shape_latest)
# ----------------------------------------------- #

# FDW Production Data Compiling ----------------- #
area, prod = FDW_PD_Compiling(df, shape_used)
area_all, prod_all = area.copy(), prod.copy()
mdx_pss = area.columns.droplevel([0,1]).unique()
# ----------------------------------------------- #

In [4]:
# Link admin boundaries ------------------------- #
link_1980, over_1980 = FDW_PD_CreateAdminLink(ML_Admin1_1980, ML_Admin1_2016, 'ADMIN1', 'ADMIN1', area, epsg)
link_1980['ML1980A109'].update(method='ABR')
link_1980['ML1980A102'].update(method='ABR', fnids={'ML2016A102': {'name': 'Koulikoro'}})
link_1980['ML1980A110'].update(method='ABR')
link_1980['ML1980A106'].update(method='ABR')
assert all(np.unique([v['method'] for k,v in link_1980.items()]) == 'ABR')
link_1993, over_1993 = FDW_PD_CreateAdminLink(ML_Admin1_1993, ML_Admin1_2016, 'ADMIN1', 'ADMIN1', area, epsg)
link_1993['ML1993A109'].update(method='ABR')
link_1993['ML1993A102'].update(method='ABR', fnids={'ML2016A102': {'name': 'Koulikoro'}})
link_1993['ML1993A106'].update(method='ABR')
link_1993['ML1993A107'].update(method='ABR')
link_1993['ML1993A108'].update(method='ABR')
assert all(np.unique([v['method'] for k,v in link_1993.items()]) == 'ABR')
# Crop specific ratios
link_ratio_1980 = FDW_PD_RatioAdminLink(link_1980, area, over_1980, mdx_pss)
link_ratio_1993 = FDW_PD_RatioAdminLink(link_1993, area, over_1993, mdx_pss)
# Merge link_ratio
assert link_ratio_1980.keys() == link_ratio_1993.keys()
link_merged = [link_ratio_1980, link_ratio_1993]
fnids_new = list(link_merged[0].keys())
link_ratio = dict()
for fnid in fnids_new:
    container = []
    for link in link_merged:
        container.append(link[fnid])
    link_ratio[fnid] = pd.concat(container, axis=1)
# Add current unit to link_ratio
for fnid_new in link_ratio.keys():
    link_ratio[fnid_new][fnid_new] = 1.0
    link_ratio[fnid_new] = link_ratio[fnid_new].sort_index(axis=1, ascending=False)
# Connect data with AdminLink
area_new, prod_new = FDW_PD_ConnectAdminLink(link_ratio, area, prod, validation=True)
# ----------------------------------------------- #

# Aggregate grain data by grain type ------------ #
[area_new, prod_new, area_all, prod_all] = FDW_PD_GrainTypeAgg([area_new, prod_new, area_all, prod_all], product_category)
# ----------------------------------------------- #

# Manual correction ----------------------------- #
crop_new = prod_new/area_new
# Potential typo: 4799142.0 -> 479142.0
prod_new.loc[2010, pd.IndexSlice['ML2016A103',:,'Sorghum','Main',:]] = 479142.0
# ----------------------------------------------- #

# Complete long format DataFrame
df_area = area_new.T.stack().reset_index().rename({0:'value'},axis=1)
df_area['indicator'] = 'area'
df_prod = prod_new.T.stack().reset_index().rename({0:'value'},axis=1)
df_prod['indicator'] = 'production'
df_yield = (prod_new/area_new).T.stack().reset_index().rename({0:'value'},axis=1)
df_yield['indicator'] = 'yield'
stack = pd.concat([df_area, df_prod, df_yield], axis=0)
# Insert a country name
stack['country'] = 'Mali'
stack = stack[['fnid','country','name','product','year','season_name','season_date','indicator','value']]
stack = stack.reset_index(drop=True)
# Change season_date to harvest_end
stack.rename(columns={'season_date':'harvest_end'},inplace=True)
stack['harvest_end'] = stack['harvest_end'].replace({
    '10-01':'12-01', # Main
})

# Save data
save_hdf('./data/crop/adm_crop_production_raw_ML.hdf', df)
save_hdf('./data/crop/adm_crop_production_ML.hdf', stack)

CBR is considered for 'ML1980A110' as no record found in: ['ML2016A108', 'ML2016A110']
CBR is considered for 'ML1980A106' as no record found in: ['ML2016A111']
CBR is considered for 'ML1993A107' as no record found in: ['ML2016A110']
CBR is considered for 'ML1993A108' as no record found in: ['ML2016A108']
CBR is considered for 'ML1993A106' as no record found in: ['ML2016A111']
- Aggregation of grain types ------------------ #
7 crops: Barley (Unspecified), Fonio, Maize (Corn), Millet, Rice (Paddy), Sorghum, Wheat Grain
7 crops: Barley, Fonio, Maize, Millet, Rice, Sorghum, Wheat

./data/crop/adm_crop_production_raw_ML.hdf is saved.
./data/crop/adm_crop_production_ML.hdf is saved.


## Visualization of production data

In [5]:
# Bar chart of national grain production
country_iso, country_name = 'ML', 'Mali'
df = pd.read_hdf('./data/crop/adm_crop_production_%s.hdf' % country_iso)
product_order = ['Maize','Millet','Rice','Sorghum','Wheat','Barley','Fonio']
for season_name in ['Main']:
    footnote = 'National grain production in %s - %s' % (country_name, season_name)
    fn_save = './figures/%s_bar_natgrainprod_%s.png' % (country_iso, season_name)
    sub = df[df['season_name'] == season_name]
    fig = PlotBarProduction(sub, product_order, footnote, fn_save)
    # fig.show()

./figures/ML_bar_natgrainprod_Main.png is saved.


![image](https://github.com/chc-ucsb/GlobalCropData/blob/main/figures/ML_bar_natgrainprod_Main.png?raw=true)

In [6]:
# Lineplot of Production-Area-Yield (PAY) time-series
country_iso, country_name = 'ML', 'Mali'
df = pd.read_hdf('./data/crop/adm_crop_production_%s.hdf' % country_iso)
product_season = [
    ['Maize','Main'],
    ['Millet','Main'],
    ['Rice','Main'],
    ['Sorghum','Main'],
]
for product_name, season_name in product_season:
    footnote = 'Production-Area-Yield (PAY) time-series of %s - %s - %s' % (country_iso, product_name, season_name)
    fn_save = './figures/%s_line_pay_%s_%s.png' % (country_iso, product_name, season_name)
    sub = df[(df['product'] == product_name) & (df['season_name'] == season_name)]
    fig = PlotLinePAY(sub, footnote, fn_save)
    # fig.show()

./figures/ML_line_pay_Maize_Main.png is saved.
./figures/ML_line_pay_Millet_Main.png is saved.
./figures/ML_line_pay_Rice_Main.png is saved.
./figures/ML_line_pay_Sorghum_Main.png is saved.


![image](https://github.com/chc-ucsb/GlobalCropData/blob/main/figures/ML_line_pay_Maize_Main.png?raw=true)
![image](https://github.com/chc-ucsb/GlobalCropData/blob/main/figures/ML_line_pay_Millet_Main.png?raw=true)
![image](https://github.com/chc-ucsb/GlobalCropData/blob/main/figures/ML_line_pay_Rice_Main.png?raw=true)
![image](https://github.com/chc-ucsb/GlobalCropData/blob/main/figures/ML_line_pay_Sorghum_Main.png?raw=true)