## Crop Data Merge
- This notebook aggregates FDW crop data up to admin 1 and admin 0, then merges crop data of individual countries into a single data file with cross-checking "FNID" and "Name" between FDW data and FEWS NET's Shapefile.

In [24]:
import os, json
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_hdf
from tools import CreateLinkAdmin
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

### Merge crop data

In [25]:
# Merge crop data
countries_ISO = ['SO','MW','KE','BF','ML','TD','ZA','NE','ZM','AO','MZ']
containera0 = []
containera1 = []
for name in countries_ISO:
    df = pd.read_hdf('./data/crop/adm_crop_production_%s.hdf' % name)

    # Select the calibrated data
    df = df[df['gscd_code'] == 'calibrated']
    if name == 'KE':
        # Manual editing -------------------------- #
        # - Fill missing "Long" season records with "Annual" records for Kenya
        df_raw = df.copy()
        cols = [
            'fnid', 'country', 'country_code', 'admin_1', 'admin_2',
            'product', 'season_name', 'growing_month', 'harvest_month', 
            'crop_production_system', 'indicator'
        ]
        df['year'] = df['harvest_year']
        df = df.pivot_table(index='year', columns=cols, values='value')
        annual = df.loc[:,pd.IndexSlice[:,'Kenya',:,:,:,'Maize','Annual',:]]
        annual.columns = annual.columns.droplevel([1,2,3,4,5,6,7,8,9])
        long = df.loc[:,pd.IndexSlice[:,'Kenya',:,:,:,'Maize','Long',:]]
        long.columns = long.columns.droplevel([1,2,3,4,5,6,7,8,9])
        long = long.fillna(annual)
        df.loc[:,pd.IndexSlice[:,'Kenya',:,:,:,'Maize','Long',:]] = long.values

#        Only fill the long rains with annual values. Keep short rains as-are        
        df = df.T.stack(dropna=True).reset_index().rename(columns={0:'value'})
        # Restore columns such as growing year
        df = df.rename(columns={'year':'harvest_year'})
        df['gscd_code'] = 'calibrated'
        df['name'] = df['admin_1']
        df['growing_year'] = df['harvest_year']
        df.loc[df['season_name'] == 'Short', 'growing_year'] -= 1
        df = df[df_raw.columns]

        # ----------------------------------------- #
    
    #write out fnid columns that only contain the admin1 or admin0 portion of the code
    df['fnid_a0'] = df.fnid.str.slice(0,6)+'A0' 
    df['fnid_a1'] = df.fnid.str.slice(0,6)+'A1'+df.fnid.str.slice(8,10) #note the hard code of the admin level to be admin1

    dsa1 = df[['fnid_a1','indicator','harvest_year','value','country','harvest_month','product','season_name']
           ].groupby(['fnid_a1','indicator','harvest_year','country','harvest_month','product','season_name']).sum()
    dsa1.reset_index(inplace=True)
    dsa1 = dsa1.pivot_table(index=['fnid_a1','harvest_year','country','harvest_month','product','season_name'],
                            columns='indicator',values='value',dropna=False,fill_value=0)
    dsa1.rename(columns={"fnid_a1":"fnid"},inplace=True)
    dsa1 = dsa1[['area','production']]
    dsa1['yield'] = dsa1['production']/dsa1['area']
    dsa1.reset_index(inplace=True)
    dsa1.rename(columns={"fnid_a1":"fnid"},inplace=True)
    dsa1.dropna(0,subset=['yield'],inplace=True)
    dsa1 = dsa1[np.isfinite(dsa1['yield'])]
    dsa1 = dsa1[dsa1['yield']!=0]

    dsa0 = df[['fnid_a0','indicator','harvest_year','value','country','harvest_month','product','season_name']
           ].groupby(['fnid_a0','indicator','harvest_year','country','harvest_month','product','season_name']).sum()
    dsa0.reset_index(inplace=True)
    dsa0 = dsa0.pivot_table(index=['fnid_a0','harvest_year','country','harvest_month','product','season_name'],
                            columns='indicator',values='value',dropna=False,fill_value=0)
    dsa0.rename(columns={"fnid_a0":"fnid"},inplace=True)
    dsa0 = dsa0[['area','production']]
    dsa0['yield'] = dsa0['production']/dsa0['area']
    dsa0.reset_index(inplace=True)
    dsa0.rename(columns={"fnid_a0":"fnid"},inplace=True)
    dsa0.dropna(0,subset=['yield'],inplace=True)
    dsa0 = dsa0[np.isfinite(dsa0['yield'])]
    dsa0 = dsa0[dsa0['yield']!=0]
    
    containera1.append(dsa1)
    containera0.append(dsa0)
    
dsa1 = pd.concat(containera1, axis=0).reset_index(drop=True)
dsa0 = pd.concat(containera0, axis=0).reset_index(drop=True)


In [26]:
# Merge the latest shapefiles
shape_container1 = []
shape_container0 = []
path_dir = './data/shapefile/fewsnet/'

#Adm1
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'SO_Admin1_1990.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'MW_Admin1_2003.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'KE_Admin1_2013.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'BF_Admin1_2001.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'ML_Admin1_2016.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'TD_Admin1_2012.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'ZA_Admin1_1994.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'NE_Admin1_2012.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'ZM_Admin1_2011.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'MZ_Admin1_2013.shp')))
shape_container1.append(gpd.read_file(os.path.join(path_dir, 'AO_Admin1_2008.shp')))
adm_current1 = pd.concat(shape_container1, axis=0).reset_index(drop=True)[['FNID','ADMIN0','ADMIN1','ADMIN2','geometry']]
adm_current1 = adm_current1.to_crs(epsg=4326)

# Update FNID to the latest FEWS NET's entire shapfile FNID (if different)
# For instance, MW2003A2 is the latest boundary in FDW data, but the FEWS NET's African ADMIN-2 shapefile contains MW2007A2 which is regarded as the same.
# We already use this African ADMIN shapefile to EO aggregation, so here we will just modify the FDW crop data.
adm1 = gpd.read_file('./data/shapefile/fewsnet/FEWSNET_Admin1.shp').to_crs("EPSG:4326")
adm2 = gpd.read_file('./data/shapefile/fewsnet/FEWSNET_Admin2.shp').to_crs("EPSG:4326")
# - Crop data
fnid_new = pd.concat([adm1.FNID, adm2.FNID], axis=0).reset_index(drop=True)
fnid_old = adm_current1.FNID
diff = fnid_old[~fnid_old.isin(fnid_new)].values
diff_adm1 = diff[[t[7]=='1' for t in diff]]
diff_adm2 = diff[[t[7]=='2' for t in diff]]
old = adm_current1[adm_current1.FNID.isin(diff_adm1)]
new = adm1[adm1.ADMIN0.isin(old.ADMIN0.unique())]
link_adm1 = CreateLinkAdmin(new, old, 'ADMIN1', 'ADMIN1')[0]
old = adm_current1[adm_current1.FNID.isin(diff_adm2)]
new = adm2[adm2.ADMIN0.isin(old.ADMIN0.unique())]
link_adm2 = CreateLinkAdmin(new, old, 'ADMIN2', 'ADMIN2')[0]
rename_adm = {**link_adm1, **link_adm2}
dsa1.fnid = dsa1.fnid.replace(rename_adm)
# - Current shapefile
adm1_countries = adm_current1[[t[7]=='1' for t in adm_current1.FNID]].ADMIN0.unique()
adm2_countries = adm_current1[[t[7]=='2' for t in adm_current1.FNID]].ADMIN0.unique()
adm_current1 = pd.concat([adm1[adm1.ADMIN0.isin(adm1_countries)],
                         adm2[adm2.ADMIN0.isin(adm2_countries)],
                        ], axis=0).sort_values(by='FNID').reset_index(drop=True)

#reshape to the same shape as the admin2 df
dsa1= pd.melt(dsa1,id_vars=['fnid','harvest_year','country','harvest_month','product','season_name'],
        value_vars=['area','production','yield'])

# Save admin1 files
save_hdf('./data/crop/adm1_crop_production_ALL.hdf', dsa1)
adm_current1.to_file('./data/shapefile/adm1_current.shp')
print('%s is saved.' % './data/shapefile/adm1_current.shp')
# Save admin0 file
save_hdf('./data/crop/adm0_crop_production_ALL.hdf', dsa0)




./data/crop/adm1_crop_production_ALL.hdf is saved.
./data/shapefile/adm1_current.shp is saved.
./data/crop/adm0_crop_production_ALL.hdf is saved.


### Summary of "adm_crop_production_ALL.hdf"

In [5]:
shape = gpd.read_file('./data/shapefile/adm_current.shp')
df = pd.read_hdf('./data/crop/adm_crop_production_ALL.hdf')
df = df.merge(shape[['FNID','ADMIN0','ADMIN1','ADMIN2']], left_on='fnid', right_on='FNID')
df = df.rename(columns={'ADMIN1':'admin1','ADMIN2':'admin2','season_name':'season'})
df = df[['fnid','country','admin1','admin2','product','season','harvest_month','harvest_year','indicator','value']]
cps = df[['country','product','season']].drop_duplicates().reset_index(drop=True)
df['admin'] = df['fnid'].apply(lambda x: x[2:8])
table = pd.DataFrame(
    index=pd.MultiIndex.from_frame(df[['country','admin','season']].drop_duplicates()),
    columns= cps['product'].unique(),
    data = '-'
).rename_axis(columns='product')
for i, (country_name, product_name, season_name) in cps.iterrows():
    fnids_country = shape.loc[shape['ADMIN0'] == country_name, 'FNID']
    sub = df[
        (df['country'] == country_name) &
        (df['product'] == product_name) &
        (df['season'] == season_name) &
        (df['indicator'] == 'area')
    ]
    count = sub.pivot_table(index='harvest_year', columns='fnid', values='value', aggfunc=len, fill_value=0)
    mean_count = count.sum().mean()
    string = '%d (%d/%d)' %  (mean_count, count.shape[1], len(fnids_country))
    table.loc[pd.IndexSlice[country_name,:,season_name],product_name] = string
print('Mean record years (# of districts)')
table

Mean record years (# of districts)


Unnamed: 0_level_0,Unnamed: 1_level_0,product,Maize,Sorghum,Rice,Millet,Wheat,Barley,Fonio,Maize (White),Maize (Yellow)
country,admin,season,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Somalia,1990A2,Gu,23 (38/74),19 (39/74),11 (1/74),-,-,-,-,-,-
Somalia,1990A2,Deyr,22 (38/74),19 (41/74),5 (2/74),-,-,-,-,-,-
Somalia,1990A2,Gu-off,5 (26/74),2 (3/74),-,-,-,-,-,-,-
Somalia,1990A2,Deyr-off,4 (27/74),1 (2/74),-,-,-,-,-,-,-
Malawi,2007A2,Winter,12 (28/28),-,10 (11/28),1 (2/28),2 (7/28),-,-,-,-
Malawi,2007A2,Main,39 (28/28),25 (26/28),24 (28/28),24 (26/28),5 (16/28),-,-,-,-
Kenya,2013A1,Annual,21 (46/47),6 (46/47),2 (23/47),6 (41/47),23 (21/47),3 (6/47),-,-,-
Kenya,2013A1,Long,33 (47/47),1 (44/47),-,-,-,-,-,-,-
Kenya,2013A1,Short,7 (45/47),2 (35/47),-,-,-,-,-,-,-
Burkina Faso,2001A2,Main,34 (45/45),34 (45/45),33 (45/45),34 (45/45),-,-,18 (18/45),-,-
