# HarvestStat Data Profiling - South Sudan

In [1]:
import glob, json
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_npz, PrintAdminUnits, PlotAdminShapes
from tools import FDW_PD_Sweeper, FDW_PD_AvalTable, FDW_PD_Compiling, FDW_PD_ValidateFnidName
from tools import FDW_PD_CreateAdminLink, FDW_PD_RatioAdminLink, FDW_PD_ConnectAdminLink
from tools import FDW_PD_CaliSeasonYear
from tools_graphic import PlotBarProduction, PlotLinePAY
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

In [2]:
# Load FEWS NET administrative boundaries ------- #
epsg = 'epsg:20135' # South Sudan
fn_shapes = sorted(glob.glob('../data/shapefile/fewsnet/SS_Admin?_????.shp'))
shape_all = []
for fn in fn_shapes:
    name = fn[-18:-4]
    exec('%s = gpd.read_file("%s").to_crs("%s")' % (name, fn, epsg))
    exec('%s["area"] = %s["geometry"].area/10**6' % (name, name))
    exec('shape_all.append(%s)' % (name))
shape_all = pd.concat(shape_all, axis=0).reset_index(drop=True)
PrintAdminUnits(shape_all)
# ----------------------------------------------- #

# FDW API host address -------------------------- #
host = 'https://fdw.fews.net'
auth = tuple(json.loads(open('token.json', "r").read()))
parameters = {
    'format': 'json',
    'country': 'South Sudan',
    'product': ['R011','R012','R013','R014','R015','R017','R018'],
    # 'survey_type': 'crop:best'
}
endpoint = '/api/cropproductionindicatorvalue/'
response = requests.get(host + endpoint, auth=auth, params=parameters, proxies={})
response.raise_for_status()
df = pd.DataFrame.from_records(response.json())
print(df.groupby(['status','collection_status','data_usage_policy']).size().reset_index(name='count'))
# ----------------------------------------------- #

- FEWS NET admin shapefiles ------------------- #
| year	 | Admin1   | # units   | Admin2   | # units   | Admin3   | # units   |
| 2011	 | SS2011A1 | 10	| SS2011A2	| 79	| nan	| 0	|
----------------------------------------------- #
          status collection_status data_usage_policy  count
0      Collected         Published            Public    476
1      Collected         Submitted            Public    474
2  Not Collected         Published            Public   1268
3  Not Collected         Submitted            Public    158


In [3]:
# Manual Pre-processing before Sweeping --------- #
# 1. Default setting 
# a) None-type crop production system
df.loc[df['crop_production_system'].isna(), 'crop_production_system'] = 'none'
df.loc[df['crop_production_system'] == '', 'crop_production_system'] = 'none'
# b) None-type population group
df.loc[df['population_group'].isna(), 'population_group'] = 'none'
df.loc[df['population_group'] == '', 'population_group'] = 'none'
# Change the season name
df['season_name'] = df['season_name'].replace({'Main harvest': 'Main'})
# Select admin-2 level data. Admin-1 level data has duplicates.
df = df[df['fnid'].apply(lambda x: x[6:8]) == 'A2']
# Remove population group of 'Returnees (by IOM)' which causes duplicates.
df = df[df['population_group'] != 'Returnees (by IOM)']
# ----------------------------------------------- #

# FDW Production Data Inspection ---------------- #
df, df_raw = FDW_PD_Sweeper(df, area_priority='Area Harvested')
table_dict = FDW_PD_AvalTable(df, shape_all)
# ----------------------------------------------- #

# FEWS NET Shapefile comparison ----------------- #
shape_used = pd.concat([
    SS_Admin1_2011, SS_Admin2_2011
], axis=0)
PlotAdminShapes(shape_used, label=True)
# ----------------------------------------------- #

- Remove missing records ---------------------- #
Orignial data points: 2,176
Removed 1,316 "Missing Value" points
158/544 "Area Harvested" points are retained.
0/544 "Area Planted" points are retained.
544/544 "Quantity Produced" points are retained.
158/544 "Yield" points are retained.
Current data points: 860

- Minor changes are applied ------------------- #

- Basic information --------------------------- #
Data period: 2011 - 2017
1 grain types are found: Cereal Crops (Mixed)
1 seasons are found: Main (10-01)
1 crop production system are found: none
Data sources include:
[1] FAO, WFP, Government of South Sudan (GoSS)  --- CFSAM special report, South Sudan
[2] FAO, WFP, Government of South Sudan (GoSS)  --- Crop Production, CFSAM, South Sudan
Administrative-1 fnids: 0
Administrative-2 fnids: 79
0 reporting units are found: 

- Total production over time ------------------ #
season_name             Main
                            
Cereal Crops (Mixed)  100.0%

- Crop calendar ----

- South Sudan crop seasonal calendar

![FEWS NET](../figures/crop_calendar/seasonal-calendar-south-sudan.png)

![USDA](../figures/crop_calendar/eafrica_od_calendar.png)

<!-- ![FAO](../figures/crop_calendar/Somalia_-_Crop_calendar.jpg) -->
<img src="../figures/crop_calendar/Somalia_-_Crop_calendar.jpg" alt="drawing" width="800"/>

- FDW data consists of `SS2011A1` and `SS2011A2`.

| Year  | Admin-1  | # units  | Admin-2  | # units |
| :---: | :----:   | :----:   | :----:   | :---:   |
| 2011  | SS2011A1 | 10 | **`SS2011A2`** | 79       |

- Comparison between admin boundaries.

![image](https://github.com/chc-ucsb/gscd/blob/main/figures/SS_admin_shapes.png?raw=true)

- **FDW data consists of only 5 years records (mostly "Quantity Produced").**
- **`SS2011A2`** is used to represent the current admin-level 2 crop data.
- Burkina Faso has a single crop season: `Main`.
- Burkina Faso has no population group(s).

In [4]:
# Define the latest shapefile ------------------- #
latest_level = 2
shape_latest = SS_Admin2_2011.copy().to_crs('epsg:4326')
# ----------------------------------------------- #

# Validation of FNIDs and Names ----------------- #
df = FDW_PD_ValidateFnidName(df, shape_used, shape_latest)
# ----------------------------------------------- #

# FDW Production Data Compiling ----------------- #
area_new, prod_new = FDW_PD_Compiling(df, shape_used)
# ----------------------------------------------- #

In [5]:
# Manual correction ----------------------------- #
crop_new = prod_new/area_new
# ----------------------------------------------- #

# Complete long format DataFrame ---------------- #
df_area = area_new.T.stack().rename('value').reset_index()
df_area['indicator'] = 'area'
df_prod = prod_new.T.stack().rename('value').reset_index()
df_prod['indicator'] = 'production'
df_yield = crop_new.T.stack().rename('value').reset_index()
df_yield['indicator'] = 'yield'
stack = pd.concat([df_area, df_prod, df_yield], axis=0)
# Add "planting year"
cols = ['season_name','product','crop_production_system','planting_month','harvest_year','harvest_month','planting_year']
season_table = df[cols].drop_duplicates()
stack = stack.merge(season_table, on=cols[:-1])
# Add country and admin names
stack = stack.merge(df[['fnid','country','country_code','admin_1','admin_2']].drop_duplicates(), on='fnid', how='inner')
names = [
    'fnid','country','country_code','admin_1','admin_2','name',
    'product','season_name','planting_year','planting_month','harvest_year','harvest_month',
    'crop_production_system','indicator','value'
]
stack = stack[names]
# ----------------------------------------------- #

# Final Processing ------------------------------ #
# Manual change of product name
product_category_custom = {
    'Cereal Crops (Mixed)': 'Cereal Crops',
}
stack['product'] = stack['product'].replace(product_category_custom)
# Calibration of crop calendar using External Crop Calendar (ECC)
ecc = pd.read_csv('../data/crop_calendar/external_crop_calendar.csv')
ecc = ecc[ecc['country'] == 'South Sudan']
stack, link_ratio = FDW_PD_CaliSeasonYear(stack, ecc, link_ratio=None)
# None admin names
stack.loc[stack['admin_1'].isna(), 'admin_1'] = 'none'
stack.loc[stack['admin_2'].isna(), 'admin_2'] = 'none'
# ----------------------------------------------- #

# Save data
fn_out = '../data/crop/adm_crop_production_SS.csv'
stack.to_csv(fn_out); print(f'{fn_out} is saved.')

All [season_name, product, crop_production_system] are in the external crop calendar.
../data/crop/adm_crop_production_SS.csv is saved.


# Visualization of production data

In [6]:
# Bar chart of national crop production
country_iso, country_name = 'SS', 'South Sudan'
df = pd.read_csv('../data/crop/adm_crop_production_%s.csv' % country_iso, index_col=0)
df['year'] = df['harvest_year']
year = [df['year'].min(), df['year'].max()]
product_order = ['Cereal Crops']
for season_name in ['Main']:
    footnote = 'National crop production in %s - %s' % (country_name, season_name)
    fn_save = '../figures/%s_bar_natgrainprod_%s.png' % (country_iso, season_name)
    sub = df[df['season_name'] == season_name]
    fig = PlotBarProduction(sub, year, product_order, footnote, fn_save)
    fig.show()

../figures/SS_bar_natgrainprod_Main.png is saved.


In [7]:
# Lineplot of Production-Area-Yield time-series
country_iso, country_name = 'SS', 'South Sudan'
df = pd.read_csv('../data/crop/adm_crop_production_%s.csv' % country_iso, index_col=0)
df['year'] = df['harvest_year']
year = [df['year'].min(), df['year'].max()]
product_season = [
    ['Cereal Crops','Main'],
]
for product_name, season_name in product_season:
    footnote = 'Production-Area-Yield time-series of %s - %s - %s' % (country_iso, product_name, season_name)
    fn_save = '../figures/%s_line_pay_%s_%s.png' % (country_iso, product_name, season_name)
    sub = df[(df['product'] == product_name) & (df['season_name'] == season_name)]
    fig = PlotLinePAY(sub, year, footnote, fn_save)
    fig.show()

../figures/SS_line_pay_Cereal Crops_Main.png is saved.
