## Crop Data Merge
- This notebook merges FDW crop data of individual countries into a single data file with cross-checking "FNID" and "Name" between FDW data and FEWS NET's Shapefile.

In [1]:
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_hdf
import json
from tools import CreateLinkAdmin
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

### Merge crop data

In [2]:
# Merge crop data
countries_ISO = ['SO','MW','KE','BF','ML','TD']
container = []
for name in countries_ISO:
    df = pd.read_hdf('./data/crop/adm_crop_production_%s.hdf' % name)
    if name == 'KE':
        # Manual editing -------------------------- #
        # - Fill missing "Long" and "short" season records with "Annual" records for Kenya
        df = df.pivot_table(index='year', columns=['fnid','country','name','product','season_name','harvest_end','indicator'], values='value')
        annual = df.loc[:,pd.IndexSlice[:,'Kenya',:,'Maize','Annual']]
        annual.columns = annual.columns.droplevel([1,2,3,4,5])
        long = df.loc[:,pd.IndexSlice[:,'Kenya',:,'Maize','Long']]
        long.columns = long.columns.droplevel([1,2,3,4,5])
        long = long.fillna(annual)
        df.loc[:,pd.IndexSlice[:,'Kenya',:,'Maize','Long']] = long.values
        short = df.loc[:,pd.IndexSlice[:,'Kenya',:,'Maize','Short']]
        short.columns = short.columns.droplevel([1,2,3,4,5])
        short = short.fillna(annual)
        df.loc[:,pd.IndexSlice[:,'Kenya',:,'Maize','Short']] = short.values
        df = df.T.stack(dropna=True).reset_index().rename(columns={0:'value'})
        # ----------------------------------------- #
    container.append(df)
df = pd.concat(container, axis=0).reset_index(drop=True)

# Merge the latest shapefile
shape_container = []
path_dir = './data/shapefile/fewsnet/'
shape_container.append(gpd.read_file(os.path.join(path_dir, 'SO_Admin2_1990.shp')))
shape_container.append(gpd.read_file(os.path.join(path_dir, 'MW_Admin2_2003.shp')))
shape_container.append(gpd.read_file(os.path.join(path_dir, 'KE_Admin1_2013.shp')))
shape_container.append(gpd.read_file(os.path.join(path_dir, 'BF_Admin2_2001.shp')))
shape_container.append(gpd.read_file(os.path.join(path_dir, 'ML_Admin1_2016.shp')))
shape_container.append(gpd.read_file(os.path.join(path_dir, 'TD_Admin1_2012.shp')))
adm_current = pd.concat(shape_container, axis=0).reset_index(drop=True)[['FNID','ADMIN0','ADMIN1','ADMIN2','geometry']]
adm_current = adm_current.to_crs(epsg=4326)

# Update FNID to the latest FEWS NET's entire shapfile FNID (if different)
# For instance, MW2003A2 is the latest boundary in FDW data, but the FEWS NET's African ADMIN-2 shapefile contains MW2007A2 which is regarded as the same.
# We already use this African ADMIN shapefile to EO aggregation, so here we will just modify the FDW crop data.
adm1 = gpd.read_file('./data/shapefile/fewsnet/FEWSNET_Admin1.shp').to_crs("EPSG:4326")
adm2 = gpd.read_file('./data/shapefile/fewsnet/FEWSNET_Admin2.shp').to_crs("EPSG:4326")
# - Crop data
fnid_new = pd.concat([adm1.FNID, adm2.FNID], axis=0).reset_index(drop=True)
fnid_old = adm_current.FNID
diff = fnid_old[~fnid_old.isin(fnid_new)].values
diff_adm1 = diff[[t[7]=='1' for t in diff]]
diff_adm2 = diff[[t[7]=='2' for t in diff]]
old = adm_current[adm_current.FNID.isin(diff_adm1)]
new = adm1[adm1.ADMIN0.isin(old.ADMIN0.unique())]
link_adm1 = CreateLinkAdmin(new, old, 'ADMIN1', 'ADMIN1')[0]
old = adm_current[adm_current.FNID.isin(diff_adm2)]
new = adm2[adm2.ADMIN0.isin(old.ADMIN0.unique())]
link_adm2 = CreateLinkAdmin(new, old, 'ADMIN2', 'ADMIN2')[0]
rename_adm = {**link_adm1, **link_adm2}
df.fnid = df.fnid.replace(rename_adm)
# - Current shapefile
adm1_countries = adm_current[[t[7]=='1' for t in adm_current.FNID]].ADMIN0.unique()
adm2_countries = adm_current[[t[7]=='2' for t in adm_current.FNID]].ADMIN0.unique()
adm_current = pd.concat([adm1[adm1.ADMIN0.isin(adm1_countries)],
                         adm2[adm2.ADMIN0.isin(adm2_countries)],
                        ], axis=0).sort_values(by='FNID').reset_index(drop=True)

# Save files
save_hdf('./data/crop/adm_crop_production_ALL.hdf', df)
### df.to_csv('./public/adm_crop_production_ALL-stable.csv')
adm_current.to_file('./data/shapefile/adm_current.shp')
print('%s is saved.' % './data/shapefile/adm_current.shp')

./data/crop/adm_crop_production_ALL.hdf is saved.
./data/shapefile/adm_current.shp is saved.


### Summary of "adm_crop_production_ALL.hdf"

In [3]:
shape = gpd.read_file('./data/shapefile/adm_current.shp')
df = pd.read_hdf('./data/crop/adm_crop_production_ALL.hdf')
df = df.merge(shape[['FNID','ADMIN0','ADMIN1','ADMIN2']], left_on='fnid', right_on='FNID')
df = df.rename(columns={'ADMIN1':'admin1','ADMIN2':'admin2','season_name':'season'})
df = df[['fnid','country','admin1','admin2','product','season','harvest_end','year','indicator','value']]
cps = df[['country','product','season']].drop_duplicates().reset_index(drop=True)
df['admin'] = df['fnid'].apply(lambda x: x[2:8])
table = pd.DataFrame(
    index=pd.MultiIndex.from_frame(df[['country','admin','season']].drop_duplicates()),
    columns= cps['product'].unique(),
    data = '-'
).rename_axis(columns='product')
for i, (country_name, product_name, season_name) in cps.iterrows():
    fnids_country = shape.loc[shape['ADMIN0'] == country_name, 'FNID']
    sub = df[
        (df['country'] == country_name) &
        (df['product'] == product_name) &
        (df['season'] == season_name) &
        (df['indicator'] == 'area')
    ]
    count = sub.pivot_table(index='year', columns='fnid', values='value', aggfunc=len, fill_value=0)
    mean_count = count.sum().mean()
    string = '%d (%d/%d)' %  (mean_count, count.shape[1], len(fnids_country))
    table.loc[pd.IndexSlice[country_name,:,season_name],product_name] = string
print('Mean record years (# of districts)')
table

Mean record years (# of districts)


Unnamed: 0_level_0,Unnamed: 1_level_0,product,Maize,Sorghum,Rice,Millet,Wheat,Barley,Fonio
country,admin,season,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Somalia,1990A2,Deyr,20 (38/74),17 (41/74),4 (2/74),-,-,-,-
Somalia,1990A2,Gu,22 (38/74),18 (39/74),11 (1/74),-,-,-,-
Somalia,1990A2,Deyr-off,4 (27/74),1 (2/74),-,-,-,-,-
Somalia,1990A2,Gu-off,5 (26/74),2 (3/74),-,-,-,-,-
Malawi,2007A2,Main,31 (28/28),22 (26/28),21 (28/28),21 (26/28),5 (16/28),-,-
Malawi,2007A2,Winter,11 (28/28),-,10 (10/28),1 (2/28),2 (7/28),-,-
Kenya,2013A1,Annual,21 (46/47),6 (46/47),2 (23/47),6 (41/47),23 (21/47),3 (6/47),-
Kenya,2013A1,Long,33 (47/47),1 (44/47),-,-,-,-,-
Kenya,2013A1,Short,28 (45/47),2 (35/47),-,-,-,-,-
Burkina Faso,2001A2,Main,34 (45/45),34 (45/45),32 (45/45),34 (45/45),-,-,19 (17/45)


### Map of subnational level of FDW data

In [7]:
fdw = gpd.read_file('./data/shapefile/country_fdw_grain_data_available.shp')
lims = fdw[fdw.continent == 'Africa'].total_bounds[[0,2,1,3]]
geojson = json.loads(fdw[['name','geometry']].to_json())
fdw = fdw[fdw['fdw_data'].notna()]
fdw['status'] = 'retrieved'
shape = gpd.read_file('./data/shapefile/adm_current.shp')
assert np.isin(shape['ADMIN0'].unique(),fdw['name']).all()
shape['level'] = shape['FNID'].apply(lambda x: 'admin-'+x[7])
admin_level = shape[['ADMIN0','level']].drop_duplicates()
for i, (country_name, level) in admin_level.iterrows():
    fdw.loc[fdw['name'] == country_name, 'status'] = level
error_list = ['Mozambique']
fdw.loc[fdw['name'].isin(error_list), 'status'] = 'unable'

In [8]:
# Mapping
fig = px.choropleth(
    data_frame=fdw,
    locations='name',
    color='status',
    geojson=geojson,
    featureidkey='properties.name',
    color_discrete_map={
        "retrieved": "LightSteelblue",
        "admin-1": "aquamarine",
        "admin-2": "aqua",
        "admin-3": "darkturquoise",
        "unable": "lightsalmon"
    },
    category_orders={"status": ["retrieved", "admin-1", "admin-2", "admin-3", "unable"]},
)
fig.update_traces(
    showscale=False,
    marker_line_width=1.5,
    marker_line_color='black',zmin=0, zmax=1,
)
fig.update_geos(visible=False, resolution=50,
                showcountries=True, countrycolor="LightSteelblue",
                lonaxis_range=lims[:2],
                lataxis_range=lims[2:4],
                showframe=False,
               )
fig.update_layout(
    font = {'family':'arial','size':15, 'color':'black'},
    width=660, height=600,
    margin={"r":0,"t":0,"l":0,"b":20},
    legend=dict(title='GSCD-FDW',font_size=15,x=1.02,y=1,xanchor='left',yanchor='top',bgcolor='rgba(0,0,0,0)'),
    font_size=14,
    dragmode=False
)
fig.add_annotation(
    xref='paper',yref='paper',
    x=0, y= -0.03,
    text='Source: FEWS NET Data Warehouse (FDW)',
    align="left",
    showarrow=False,
    font = {'family':'arial','size':15, 'color':'dimgrey'},
)
# fig.show()
fn_save = './figures/map_gscd_fdw_status.png'
fig.write_image(fn_save)
print('%s is saved.' % fn_save)

./figures/map_gscd_fdw_status.png is saved.


### Somalia - Gu (table format)

In [4]:
# for product_name in ['Maize', 'Sorghum']:
#     df = pd.read_hdf('./data/crop/adm_crop_production_ALL.hdf')
#     df = df[
#         (df['country'] == 'Somalia') &
#         (df['product'] == product_name) &
#         (df['season_name'] == 'Gu')
#     ].reset_index(drop=True)
#     df = df.pivot_table(index='fnid',columns=['indicator','year'],values='value')
#     with pd.ExcelWriter('./public/somalia/adm_crop_production_table_somalia_gu_%s.xlsx' % product_name.lower()) as writer:  
#         df['production'].to_excel(writer, sheet_name='quantity_produced')
#         df['area'].to_excel(writer, sheet_name='harvested_area')
#         df['yield'].to_excel(writer, sheet_name='yield')

## Export to public data folder

In [5]:
# Weston's Google Drive
# shape = gpd.read_file('./data/shapefile/adm_current.shp')
# df = pd.read_hdf('./data/crop/adm_crop_production_ALL.hdf')
# df = df.merge(shape[['FNID','ADMIN0','ADMIN1','ADMIN2']], left_on='fnid', right_on='FNID')
# df = df.rename(columns={'ADMIN1':'admin1','ADMIN2':'admin2','season_name':'season'})
# df = df[['fnid','country','admin1','admin2','product','season','harvest_end','year','indicator','value']]
# df = df[
#     (df['season'].isin(['Gu','Deyr','Long','Short','Main'])) &
#     (df['product'].isin(['Maize','Wheat','Sorghum']))
# ].reset_index(drop=True)
# cps = df[['country','product','season']].drop_duplicates()
# print(cps)
# df_yield = df[df['indicator'] == 'yield']
# df_yield = df_yield.pivot_table(index=['fnid','country','admin1','admin2','product','season','harvest_end','indicator'], columns='year', values='value')
# df_yield = df_yield.astype(np.float32).round(3)
# fn_out = './public/grain_yield_africa.csv'
# df_yield.to_csv(fn_out)
# print("%s is saved." % fn_out)
# fn_out = './public/grain_pay_africa.csv'
# df.to_csv(fn_out)
# print("%s is saved." % fn_out)
# shape.to_file('./public/admin_africa.shp')
# print('./public/admin_africa.shp is saved.')