# HarvestStat Data Profiling - Chad

In [1]:
import glob, json
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_npz, PrintAdminUnits, PlotAdminShapes
from tools import FDW_PD_Sweeper, FDW_PD_AvalTable, FDW_PD_Compiling, FDW_PD_ValidateFnidName
from tools import FDW_PD_CreateAdminLink, FDW_PD_RatioAdminLink, FDW_PD_ConnectAdminLink
from tools import FDW_PD_CaliSeasonYear
from tools_graphic import PlotBarProduction, PlotLinePAY
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

In [2]:
# Load FEWS NET administrative boundaries ------- #
epsg = 'epsg:32633' # Chad
fn_shapes = sorted(glob.glob('../data/shapefile/fewsnet/TD_Admin?_????.shp'))
shape_all = []
for fn in fn_shapes:
    name = fn[-18:-4]
    exec('%s = gpd.read_file("%s").to_crs("%s")' % (name, fn, epsg))
    exec('%s["area"] = %s["geometry"].area/10**6' % (name, name))
    exec('shape_all.append(%s)' % (name))
shape_all = pd.concat(shape_all, axis=0).reset_index(drop=True)
PrintAdminUnits(shape_all)
# ----------------------------------------------- #

# FDW API host address -------------------------- #
host = 'https://fdw.fews.net'
auth = tuple(json.loads(open('token.json', "r").read()))
parameters = {
    'format': 'json',
    'country': 'Chad',
    'product': ['R011','R012','R013','R014','R015','R017','R018'],
    'survey_type': 'crop:best'
}
endpoint = '/api/cropproductionindicatorvalue/'
response = requests.get(host + endpoint, auth=auth, params=parameters, proxies={})
response.raise_for_status()
df = pd.DataFrame.from_records(response.json())
# ----------------------------------------------- #

###################################################
# There is an issue with the submitted data, which only includes 2020, so we proceed without it.
df = df[df['collection_status'] != 'Submitted']
###################################################

- FEWS NET admin shapefiles ------------------- #
| year	 | Admin1   | # units   | Admin2   | # units   | Admin3   | # units   |
| 1960	 | TD1960A1 | 14	| TD1960A2	| 52	| nan	| 0	|
| 2008	 | TD2008A1 | 22	| TD2008A2	| 62	| nan	| 0	|
| 2012	 | TD2012A1 | 23	| TD2012A2	| 68	| nan	| 0	|
| 2017	 | TD2017A1 | 23	| TD2017A2	| 70	| nan	| 0	|
----------------------------------------------- #


In [3]:
# Manual Pre-processing before Sweeping --------- #
# 1. Default setting 
# a) None-type crop production system
df.loc[df['crop_production_system'].isna(), 'crop_production_system'] = 'none'
df.loc[df['crop_production_system'] == '', 'crop_production_system'] = 'none'
# b) None-type population group
df.loc[df['population_group'].isna(), 'population_group'] = 'none'
df.loc[df['population_group'] == '', 'population_group'] = 'none'
# 2. Manual setting
# a) Add admin names to reporting units
admin_level = 'admin_1'
reporting_unit = df.loc[df['fnid'].map(lambda x: x[6] != 'A'), 'fnid'].unique()
df.loc[df.fnid.isin(reporting_unit),admin_level] = df.loc[df.fnid.isin(reporting_unit), 'locality_name'].map(lambda x: x.split(' ')[0])
# ----------------------------------------------- #

# FDW Production Data Inspection ---------------- #
df, df_raw = FDW_PD_Sweeper(df, area_priority='Area Harvested')
table_dict = FDW_PD_AvalTable(df, shape_all)
# ----------------------------------------------- #

# FEWS NET Shapefile comparison ----------------- #
shape_used = pd.concat([TD_Admin1_1960, TD_Admin1_2008, TD_Admin1_2012], axis=0)
PlotAdminShapes(shape_used, label=True)
# ----------------------------------------------- #

- Remove missing records ---------------------- #
Orignial data points: 9,647
Removed 120 "Missing Value" points
3,120/3,126 "Area Harvested" points are retained.
3,093/3,126 "Quantity Produced" points are retained.
3,314/3,395 "Yield" points are retained.
Current data points: 9,527

- Minor changes are applied ------------------- #

- Basic information --------------------------- #
Data period: 1983 - 2017
13 grain types are found: Bambara groundnut, Cassava, Cowpeas (Mixed), Fonio, Groundnuts (In Shell), Maize (Corn), Millet, Rice (Paddy), Sesame Seed, Sorghum, Sweet Potatoes, Taro, move to 1594AA, Wheat Grain
2 seasons are found: Main harvest (10-01), Cold off-season harvest (02-01)
1 crop production system are found: none
Data sources include:
[1] Ministry of Agriculture and Irrigation, Chad --- Resultats Definitifs De La Campagne Agricole, Chad
Administrative-1 fnids: 49
Administrative-2 fnids: 0
0 reporting units are found: 

- Total production over time ------------------ #
seas


- Chad crop seasonal calendar

![FEWS NET](https://fews.net/sites/default/files/styles/large_width_880/public/2023-03/seasonal-calendar-chad.png?itok=IYemQBC5)

- FDW data consists of `TD1960A1`, `TD2008A1`, and `TD2012A1`.

| Year | Admin-1 | # units  | Admin-2  | # units |
| :---: | :----:  | :----:   | :----:   | :---:  |
| 1960 | TD1960A1| 14        | TD1960A2 | 52     |
| 2008 | TD2008A1| 22        | TD2008A2 | 62     |
| 2012 | **`TD2012A1`**| 23 | TD2012A2 | 68    |

- Comparison between admin boundaries.

![image](https://github.com/chc-ucsb/gscd/blob/main/figures/TD_admin_shapes.png?raw=true)

- In 2008, 8 districts are divided and added.

| 1960-2008 |2008-2012|
| :---:|:---:|
|TD1960A124 (Chari-Baguirmi) | TD2008A104 (Chari-Baguirmi), TD2008A108 (Hadjer-Lamis), TD2008A117 (N'Djamena) |
|TD1960A125 (Kanem) | TD2008A109 (Kanem), TD2008A101 (Barh el Gazel)|
|TD1960A126 (Moyen-Chari)| TD2008A116 (Moyen-Chari), TD2008A113 (Mandoul)|
|TD1960A127 (Ouaddai) | TD2008A118 (Ouaddai), TD2008A120 (Sila) | 
|TD1960A128 (B.E.T.) | TD2008A122 (Tibesti), TD2008A103 (Borkou), TD2008A130 (Ennedi)|
|TD1960A129 (Mayo-Kebbi) | TD2008A114 (Mayo-Kebbi Est), TD2008A115 (Mayo-Kebbi Ouest) |

- In 2012, 1 district is divided and added.

| 2008-2012| 2012-present |
| :---:|:---:|
|TD2008A130 (Ennedi) | TD2012A105 (Ennedi-Est), TD2012A106 (Ennedi-Ouest)|

- **`TD2012A1`** is used to represent current admin-level 1 crop data.
- Chad has two crop seasons: `Main harvest` and `Cold off-season`.
- Chad has no population group(s).

In [4]:
# Define the latest shapefile ------------------- #
latest_level = 1
shape_latest = TD_Admin1_2012.copy().to_crs('epsg:4326')
# ----------------------------------------------- #

# Validation of FNIDs and Names ----------------- #
df = FDW_PD_ValidateFnidName(df, shape_used, shape_latest)
df['season_name'] = df['season_name'].replace({'Main harvest':'Main', 'Cold off-season harvest': 'Cold-off'})
# ----------------------------------------------- #

# FDW Production Data Compiling ----------------- #
area, prod = FDW_PD_Compiling(df, shape_used)
area_all, prod_all = area.copy(), prod.copy()
mdx_pss = area.columns.droplevel([0,1]).unique()
# ----------------------------------------------- #

In [5]:
# Link admin boundaries ------------------------- #
link_1960, over_1960 = FDW_PD_CreateAdminLink(TD_Admin1_1960, TD_Admin1_2012, 'ADMIN1', 'ADMIN1', prod, epsg)
link_1960['TD1960A128'].update(method='PBR')
link_1960['TD1960A124'].update(method='PBR')
assert all(np.unique([v['method'] for k,v in link_1960.items()]) == 'PBR')
link_2008, over_2008 = FDW_PD_CreateAdminLink(TD_Admin1_2008, TD_Admin1_2012, 'ADMIN1', 'ADMIN1', prod, epsg)
link_2008['TD2008A130'].update(method='PBR')
link_2008['TD2008A122'].update(method='PBR')
link_2008['TD2008A103'].update(method='PBR')
link_2008['TD2008A108'].update(method='PBR')
link_2008['TD2008A117'].update(method='PBR')
assert all(np.unique([v['method'] for k,v in link_2008.items()]) == 'PBR')
# Crop specific ratios
link_ratio_1960 = FDW_PD_RatioAdminLink(link_1960, prod, over_1960, mdx_pss)
link_ratio_2008 = FDW_PD_RatioAdminLink(link_2008, prod, over_2008, mdx_pss)
# Merge link_ratio
assert link_ratio_1960.keys() == link_ratio_2008.keys()
link_merged = [link_ratio_1960, link_ratio_2008]
fnids_new = list(link_merged[0].keys())
link_ratio = dict()
for fnid in fnids_new:
    container = []
    for link in link_merged:
        container.append(link[fnid])
    link_ratio[fnid] = pd.concat(container, axis=1)
# Add current unit to link_ratio
for fnid_new in link_ratio.keys():
    link_ratio[fnid_new][fnid_new] = 1.0
    link_ratio[fnid_new] = link_ratio[fnid_new].sort_index(axis=1, ascending=False)
# Connect data with AdminLink
area_new, prod_new = FDW_PD_ConnectAdminLink(link_ratio, area, prod, validation=True)
# ----------------------------------------------- #

CBR is considered for 'TD1960A128' as no record found in: ['TD2012A103', 'TD2012A105', 'TD2012A106', 'TD2012A122']
CBR is considered for 'TD1960A124' as no record found in: ['TD2012A117']
CBR is considered for 'TD2008A130' as no record found in: ['TD2012A105', 'TD2012A106']
CBR is considered for 'TD2008A122' as no record found in: ['TD2012A122']
CBR is considered for 'TD2008A103' as no record found in: ['TD2012A103']


In [6]:
# Manual correction ----------------------------- #
crop_new = prod_new/area_new
# ----------------------------------------------- #

# Complete long format DataFrame ---------------- #
df_area = area_new.T.stack().rename('value').reset_index()
df_area['indicator'] = 'area'
df_prod = prod_new.T.stack().rename('value').reset_index()
df_prod['indicator'] = 'production'
df_yield = (prod_new/area_new).T.stack().rename('value').reset_index()
df_yield['indicator'] = 'yield'
stack = pd.concat([df_area, df_prod, df_yield], axis=0)
# Add "planting year"
cols = ['season_name','product','crop_production_system','planting_month','harvest_year','harvest_month','planting_year']
season_table = df[cols].drop_duplicates()
stack = stack.merge(season_table, on=cols[:-1])
# Add country and admin names
stack = stack.merge(df[['fnid','country','country_code','admin_1','admin_2']].drop_duplicates(), on='fnid', how='inner')
names = [
    'fnid','country','country_code','admin_1','admin_2','name',
    'product','season_name','planting_year','planting_month','harvest_year','harvest_month',
    'crop_production_system','indicator','value'
]
stack = stack[names]
# ----------------------------------------------- #

# Final Processing ------------------------------ #
# Manual change of product name
product_category_custom = {
    'Bambara groundnut': 'Bambara groundnut',
    'Cassava': 'Cassava',
    'Cowpeas (Mixed)': 'Cowpeas',
    'Fonio': 'Fonio',
    'Groundnuts (In Shell)': 'Groundnuts (In Shell)',
    'Maize (Corn)': 'Maize',
    'Millet': 'Millet',
    'Rice (Paddy)': 'Rice',
    'Sesame Seed': 'Sesame Seed',
    'Sorghum': 'Sorghum',
    'Sweet Potatoes': 'Sweet Potatoes',
    'Taro, move to 1594AA': 'Taro',
    'Wheat Grain': 'Wheat'
}
stack['product'] = stack['product'].replace(product_category_custom)
for f, r in link_ratio.items():
    mdx = pd.MultiIndex.from_frame(r.index.to_frame().reset_index(drop=True).replace(product_category_custom))
    r.index = mdx
    link_ratio[f] = r
# Calibration of crop calendar using External Crop Calendar (ECC)
ecc = pd.read_csv('../data/crop_calendar/external_crop_calendar.csv')
ecc = ecc[ecc['country'] == 'Chad']
stack, link_ratio = FDW_PD_CaliSeasonYear(stack, ecc, link_ratio)
# None admin names
stack.loc[stack['admin_1'].isna(), 'admin_1'] = 'none'
stack.loc[stack['admin_2'].isna(), 'admin_2'] = 'none'
# ----------------------------------------------- #

# Save data
fn_out = '../data/crop/adm_crop_production_TD.csv'
stack.to_csv(fn_out); print(f'{fn_out} is saved.')
save_npz('../data/crop/adm_crop_production_TD_ratio.npz', link_ratio)

All [season_name, product, crop_production_system] are in the external crop calendar.
../data/crop/adm_crop_production_TD.csv is saved.
../data/crop/adm_crop_production_TD_ratio.npz is saved.


## Visualization of production data

In [7]:
# Bar chart of national grain production
country_iso, country_name = 'TD', 'Chad'
df = pd.read_csv('../data/crop/adm_crop_production_%s.csv' % country_iso, index_col=0)
df['year'] = df['harvest_year']
year = [df['year'].min(), df['year'].max()]
product_order = ['Sorghum','Millet','Maize','Rice','Fonio','Wheat']
for season_name in ['Main','Cold-off']:
    footnote = 'National grain production in %s - %s' % (country_name, season_name)
    fn_save = '../figures/%s_bar_natgrainprod_%s.png' % (country_iso, season_name)
    sub = df[df['season_name'] == season_name]
    fig = PlotBarProduction(sub, year, product_order, footnote, fn_save)
    fig.show()

../figures/TD_bar_natgrainprod_Main.png is saved.


../figures/TD_bar_natgrainprod_Cold-off.png is saved.


In [8]:
# Lineplot of Production-Area-Yield time-series
country_iso, country_name = 'TD', 'Chad'
df = pd.read_csv('../data/crop/adm_crop_production_%s.csv' % country_iso, index_col=0)
df['year'] = df['harvest_year']
year = [df['year'].min(), df['year'].max()]
product_season = [
    ['Sorghum','Main'],
    ['Millet','Main'],
    ['Maize','Main'],
    ['Sorghum','Cold-off'],
]
for product_name, season_name in product_season:
    footnote = 'Production-Area-Yield (PAY) time-series of %s - %s - %s' % (country_iso, product_name, season_name)
    fn_save = '../figures/%s_line_pay_%s_%s.png' % (country_iso, product_name, season_name)
    sub = df[(df['product'] == product_name) & (df['season_name'] == season_name)]
    fig = PlotLinePAY(sub, year, footnote, fn_save)
    fig.show()

../figures/TD_line_pay_Sorghum_Main.png is saved.


../figures/TD_line_pay_Millet_Main.png is saved.


../figures/TD_line_pay_Maize_Main.png is saved.


../figures/TD_line_pay_Sorghum_Cold-off.png is saved.
