## Preparation for FDW Crop Production Data Profiling
- [FEWS NET Data Warehouse (FDW)](https://fdw.fews.net/en/)
- [FDW API Guide](https://fdw.fews.net/en/docs/api_reference/api_reference.html)
- [FEWSNET Data Inventory](https://fdw.fews.net/dashboard/inventory/)
- [FEWS NET CropProduction Explorer](https://fdw.fews.net/dashboard/crop-production/)

Donghoon Lee (donghoonlee@ucsb.edu)</br>
Revised at 2022.08.25

### Pre-installation
We have many static shapefiles (downloaded from [FEWS NET Data Center](https://fews.net/fews-data/334)) and IIASA-IFPRI cropmask files that do not need to be in the GitHub repository.</br>
Please download them using rsync service as below:
```shell
rsync -auzv --delete chc-data-out.chc.ucsb.edu::chc-out/people/dlee/gscd/data [GSCD home directory]
```
The above code will rsync https://data.chc.ucsb.edu/people/dlee/gscd/data/ to the GSCD home directory.</br>

In [1]:
import os, sys, json, time
import shutil
from itertools import product, compress, chain
from functools import reduce
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import requests
import numpy as np
import pandas as pd
pd.set_option('mode.chained_assignment', None)
import geopandas as gpd
import plotly
import plotly.graph_objects as go
import plotly.express as px

In [2]:
stime = time.time()

# Retrieve all grain data using API
host = 'https://fdw.fews.net'
auth = tuple(json.loads(open('token.json', "r").read()))
parameters = {
    'format': 'json',
    'product': 'R011',
    'survey_type': 'crop:best',
    'fields': 'simple'
}
endpoint = '/api/cropproductionfacts/'
response = requests.get(host + endpoint, auth=auth, params=parameters, proxies={})
response.raise_for_status()
df = pd.DataFrame.from_records(response.json())

print('took %ds.' % (time.time() - stime))

KeyboardInterrupt: 

### 1. Central Product Classification (CPC) Version 2.1 (CPCV2) code - Grain products
- UN's CPC Version 2.1 document can be found at [here](https://digitallibrary.un.org/record/3900378?ln=en), [table](http://datalab.review.fao.org/datalab/caliper/web/classification-page/39), [pdf](https://unstats.un.org/unsd/classifications/unsdclassifications/cpcv21.pdf).
- code/broader category
    - 0: Agriculture, forestry and fishery products
    - 01: Products of agriculture, horticulture and market gardening
    - 011: Cereals
    - 0111: Wheat
    - 0112: Maize
    - 0113: Rice
    - 0114: Sorghum
    - 0115: Barley
    - 0116: Rye
    - 0117: Oats
    - 0118: Millet
    - 0119: Other cereals

In [None]:
# Create a grain_code table
cpcv_category_name = {
    '111': 'Wheat', 
    '112': 'Maize', 
    '113': 'Rice', 
    '114': 'Sorghum', 
    '115': 'Barley', 
    '116': 'Rye', 
    '117': 'Oats', 
    '118': 'Millet', 
}
grain_code = df[['cpcv2', 'cpcv2_description', 'product']].drop_duplicates().sort_values('cpcv2').reset_index(drop=True)
grain_code['cpcv2_category'] = grain_code['cpcv2'].apply(lambda x: x[2:5])
grain_code['product_category'] = grain_code['cpcv2_category'].replace(cpcv_category_name)
grain_code.loc[grain_code['cpcv2'] == 'R01190AA', 'product_category'] = 'Teff'
grain_code.loc[grain_code['cpcv2'] == 'R01190AB', 'product_category'] = 'Fonio'
grain_code.loc[grain_code['cpcv2'] == 'R01190XX', 'product_category'] = 'Mixed'
fn_out = './data/crop/grain_cpcv2_code.hdf'
grain_code.to_hdf(fn_out, 'df')
print('%s is saved.' % fn_out)
grain_code

### 2. African countries where FDW grain data is available

In [None]:
sub = df[df['status'] == 'Collected']
sub.pivot_table(index='country',columns='indicator',values='value',aggfunc=len,fill_value=0)

In [None]:
# Comparison with a world shapefile
country_fdw = df['country'].unique()
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
lims = world[world.continent == 'Africa'].total_bounds[[0,2,1,3]]
world['name'] = world['name'].replace({
    'S. Sudan': 'South Sudan',
    'Tanzania': 'Tanzania, United Republic of',
    'Central African Rep.': 'Central African Republic',
    'Somaliland': 'Somalia',
    'Dem. Rep. Congo': 'Congo, The Democratic Republic of the'
})
world = world.dissolve(by='name',as_index=False)

# Select African countries
country = world.loc[world['name'].isin(country_fdw)]
country_africa = country[country['continent'] == 'Africa']
num_country_africa = country_africa.shape[0]
print('FDW grain data exists in %d African countries.' % num_country_africa)
world['fdw_data'] = np.nan
world.loc[world.name.isin(country_africa['name']), 'fdw_data'] = 1
geojson = json.loads(world[['name','geometry']].to_json())
assert world.name.isin(country_fdw).sum() == len(country_fdw)

# Mapping
fig = go.Figure(data=go.Choropleth(
    locations = world.name,
    z = world.fdw_data,
    geojson=geojson,
    featureidkey='properties.name',
    marker_line_width=1,
    marker_line_color='black',zmin=0, zmax=1,
    colorbar = None,
))
fig.update_traces(showscale=False)
fig.update_geos(visible=False, resolution=50,
                showcountries=True, countrycolor="grey",
                lonaxis_range=lims[:2],
                lataxis_range=lims[2:4],
                showframe=False,
               )
fig.update_layout(
    width=600, height=600,
    margin={"r":0,"t":0,"l":0,"b":20},
    font_size=14,
    dragmode=False
)
fig.add_annotation(
    xref='paper',yref='paper',
    x=0, y= -0.03,
    text='*%d African countries where FDW grain data is available' % num_country_africa,
    align="left",
    showarrow=False,
    font = {'family':'arial','size':15, 'color':'dimgrey'},
)
# fig.show()
fn_save = './figures/map_fdw_available.png'
fig.write_image(fn_save)
print('%s is saved.' % fn_save)
fn_out = './data/shapefile/country_fdw_grain_data_available.shp'
world.to_file(fn_out)
print('%s is saved.' % fn_out)

![image](https://github.com/chc-ucsb/gscd/blob/main/figures/map_fdw_available.png?raw=true)

In [None]:
world.loc[world['fdw_data'].notna(),'name'].reset_index(drop=True)

### 3. Download all FEWS NET shapefiles
You do not need to run the commented codes if you downloaded all shapefiles through [pre-installation](#Pre-installation)

In [None]:
# ISO codes of all countries from the African shapefile
df1 = gpd.read_file('./data/shapefile/fewsnet/FEWSNET_Admin1.shp')
df2 = gpd.read_file('./data/shapefile/fewsnet/FEWSNET_Admin2.shp')
country_code1 = df1.loc[df1['COUNTRY'].notna(),['COUNTRY','ADMIN0']].drop_duplicates().reset_index(drop=True)
country_code2 = df2.loc[df2['COUNTRY'].notna(),['COUNTRY','ADMIN0']].drop_duplicates().reset_index(drop=True)
fnid_code = pd.concat([df1['FNID'].apply(lambda x: str(x)[:8]), df2['FNID'].apply(lambda x: str(x)[:8])],axis=0)
fnid_code = fnid_code.unique()

# # # Extract "ZA_Admin1_1994.shp" from "FEWSNET_Admin1.shp"
# # sub = df1[df1['FNID'].apply(lambda x: str(x)[:8] == 'ZA1994A1')].reset_index(drop=True)
# # fn_out = './data/shapefile/fewsnet/ZA_Admin1_1994.shp'
# # sub.to_file(fn_out)
# # print('%s is saved.' % fn_out)

# # Download shapefiles of administrative boundaries from FEWS NET
# path_url = 'https://fews.net/data_portal_download/download?data_file_path=http%3A//shapefiles.fews.net.s3.amazonaws.com/ADMIN/'
# path_dir = './data/shapefile/fewsnet/'
# comb = product(list(country_code1['COUNTRY'].unique()),[1,2],list(np.arange(1950,2023)))
# for (code, level, year) in comb:
#     shape_name = '%s_Admin%d_%d.zip' % (code, level, year)
#     fn_url = os.path.join(path_url, shape_name)
#     fn_dir = os.path.join(path_dir, shape_name)
#     if os.path.exists(fn_dir[:-3]+'shp'):
#         print('%s exsits.' % (fn_dir[:-3]+'shp'))
#         continue
#     else:
#         response = requests.get(fn_url)
#         response.raise_for_status()
#         if len(response.content) == 0:
#             # print('%s is not available (no content).' % shape_name)
#             continue
#         else:
#             f = open(fn_dir,'wb')
#             f.write(response.content)
#             f.close()
#             shutil.unpack_archive(fn_dir, path_dir)
#             print('%s is saved.' % (fn_dir[:-3]+'shp'))
#             os.remove(fn_dir)

In [None]:
country_code1

### Countries with population_group

In [None]:
df.loc[df['population_group'] != '', ['country','population_group']].drop_duplicates()

### Cross-comparison bewteen FDW data and FEWS NET shapefiles

In [None]:
# Country, ISO code, and Admin levels
path_url = 'https://fews.net/data_portal_download/download?data_file_path=http%3A//shapefiles.fews.net.s3.amazonaws.com/ADMIN/'
path_dir = './data/shapefile/fewsnet/'
code_year = pd.concat([df['country'], df['fnid'].apply(lambda x: x[:2]), df['fnid'].apply(lambda x: x[2:8])],axis=1)
code_year = code_year.drop_duplicates().reset_index(drop=True)
code_year.columns = ['country','code','year']
for i, row in code_year[['country','code']].drop_duplicates().iterrows():
    country, code = row['country'], row['code']
    year = code_year.loc[code_year['code'] == code, 'year']
    print(country, code, sorted(year.values))
code_year['shape'] = False
for i, row in code_year.iterrows():
    country, code, year = row['country'], row['code'], row['year']
    shape_name = '%s_Admin%d_%d.shp' % (code, int(year[-1]), int(year[:4]))
    exist = os.path.exists(os.path.join(path_dir, shape_name))
    if exist == True:
        code_year.loc[i, 'shape'] = exist
    else:
        fn_url = os.path.join(path_url, shape_name[:-3] + 'zip')
        fn_dir = os.path.join(path_dir, shape_name[:-3] + 'zip')
        response = requests.get(fn_url)
        response.raise_for_status()
        if len(response.content) == 0:
            continue
        else:
            f = open(fn_dir,'wb')
            f.write(response.content)
            f.close()
            print(fn_dir, 'is saved.')
            shutil.unpack_archive(fn_dir, path_dir)
            os.remove(fn_dir)
code_year_miss = code_year[code_year['shape'] == False]
reporting_unit = code_year_miss['year'].apply(lambda x: x[-2] == 'R')
code_year_miss = code_year_miss[~reporting_unit].reset_index(drop=True)

### Data Availability Table

In [None]:
df.to_hdf('./removable_all_data.hdf', 'data')
df = pd.read_hdf('./removable_all_data.hdf')
sub = df[df['status'] == 'Collected']

# Basic setting
sub.loc[:, 'country_iso'] = sub['fnid'].apply(lambda x: str(x[:2]))
sub.loc[:, 'admin_code'] = sub['fnid'].apply(lambda x: str(x[:8]))
sub.loc[:, 'admin_level'] = sub['fnid'].apply(lambda x: str(x[6:8]))
sub.loc[:, 'year'] = sub['season_year'].apply(lambda x: x[-4:]).astype(int)
sub.loc[sub['crop_production_system'].isna(), 'crop_production_system'] = 'None'
sub.loc[sub['population_group'].isna(), 'population_group'] = 'None'
sub.loc[sub['population_group'] == '', 'population_group'] = 'None'

# Reduce the dataframe
sub = sub[
    (sub['population_group'] == 'None') &
    (sub['admin_level'].isin(['A1','A2','A3'])) &
    (~sub['country'].isin(['Cuba','Panama','Paraguay','Thailand','Haiti']))
]

# Some admin_code does not exist in FEWS NET shapefiles
sub['admin_code'] = sub['admin_code'].replace({'AF2018A1':'AF2017A1', 'YE1990A1':'YE2004A1'})

# Calculate the number of admin_code
admin_code_number = pd.Series(index=sub['admin_code'].unique(), dtype=int)
for c in sub['admin_code'].unique():
    path_dir = './data/shapefile/fewsnet/%s_Admin%s_%s.shp' % (c[:2], c[-1], c[2:6])
    admin_code_number[c] = gpd.read_file(path_dir).shape[0]
admin_code_number = admin_code_number.astype(int)
sub['admin_code_numb'] = sub['admin_code'].replace(admin_code_number.to_dict())

# Table of data availability
table_count = sub.loc[sub['indicator'] == 'Quantity Produced'].pivot_table(
    index=['country','product','season_name','crop_production_system','admin_level'],
    columns='year',values='value',aggfunc=len,fill_value=0
)
table_numb = sub.loc[sub['indicator'] == 'Quantity Produced'].pivot_table(
    index=['country','product','season_name','crop_production_system','admin_level'],
    columns='year',values='admin_code_numb',aggfunc='max', fill_value=0    # "Max" could work well but not perfectly.
)
table_string = table_count.astype(str)+'/'+table_numb.astype(str)
table_percent = table_count/table_numb*100
table_percent[table_percent.isna()] = 0
table_percent = table_percent.astype(int)
table_percent[table_percent == 0] = np.nan
with pd.ExcelWriter('./data/crop/data_availability.xlsx') as writer:  
    table_string.to_excel(writer, sheet_name='number_of_records')
    table_percent.to_excel(writer, sheet_name='percent_of_records')
    print('%s is saved.' % './data/crop/data_availability.xlsx')

In [None]:
table_string.loc[pd.IndexSlice['Somalia','Maize (Corn)'],2000:]